def test_create_category_index_from_labelmap(self): label_map_string = """ item { id:2 name:'cat' } item { id:1 name:'dog' } """ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt') with tf.gfile.Open(label_map_path, 'wb') as f: f.write(label_map_string) category_index = label_map_util.create_category_index_from_labelmap( label_map_path) self.assertDictEqual({ 1: { 'name': u'dog', 'id': 1 }, 2: { 'name': u'cat', 'id': 2 } }, category_index)
def run(self): self.log(" Starting robot who " + self.getWho() + " kind " + self.config.getKind() + " instanceType " + str(self.config.getInstanceType())) if not os.path.exists(TensorFlowClassification.PATH_TO_FROZEN_GRAPH): opener = urllib.request.URLopener() opener.retrieve(TensorFlowClassification.DOWNLOAD_BASE + TensorFlowClassification.MODEL_FILE, TensorFlowClassification.MODEL_FILE) tar_file = tarfile.open(TensorFlowClassification.MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if TensorFlowClassification.FROZEN_GRAPH_PB_NAME in file_name: tar_file.extract(file, Sensation.DATADIR) TensorFlowClassification.detection_graph = tf.Graph() with TensorFlowClassification.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(TensorFlowClassification.PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') TensorFlowClassification.category_index = label_map_util.create_category_index_from_labelmap(TensorFlowClassification.PATH_TO_LABELS, use_display_name=True) self.running=True # live until stopped self.mode = Sensation.Mode.Normal for image_path in self.TEST_IMAGE_PATHS: image = PIL_Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = self.load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. # TODO detection_graph where it is defined output_dict = self.run_inference_for_single_image(image_np_expanded, self.detection_graph) i=0 for classInd in output_dict[self.DETECTION_CLASSES]: self.log("image className " + self.category_index[classInd][self.NAME] + ' score ' + str(output_dict[self.DETECTION_SCORES][i]) + ' box ' + str(output_dict[self.DETECTION_BOXES][i])) i = i+1 while self.running: sensation=self.getAxon().get() self.log("got sensation from queue " + sensation.toDebugStr()) self.process(sensation) self.log("Stopping TensorFlowClassification") self.mode = Sensation.Mode.Stopping # self.camera.close() self.log("run ALL SHUT DOWN")
def eager_eval_loop(detection_model, configs, eval_dataset, use_tpu=False, postprocess_on_cpu=False, global_step=None): """Evaluate the model eagerly on the evaluation dataset. This method will compute the evaluation metrics specified in the configs on the entire evaluation dataset, then return the metrics. It will also log the metrics to TensorBoard. Args: detection_model: A DetectionModel (based on Keras) to evaluate. configs: Object detection configs that specify the evaluators that should be used, as well as whether regularization loss should be included and if bfloat16 should be used on TPUs. eval_dataset: Dataset containing evaluation data. use_tpu: Whether a TPU is being used to execute the model for evaluation. postprocess_on_cpu: Whether model postprocessing should happen on the CPU when using a TPU to execute the model. global_step: A variable containing the training step this model was trained to. Used for logging purposes. Returns: A dict of evaluation metrics representing the results of this evaluation. """ train_config = configs['train_config'] eval_input_config = configs['eval_input_config'] eval_config = configs['eval_config'] add_regularization_loss = train_config.add_regularization_loss is_training = False detection_model._is_training = is_training # pylint: disable=protected-access tf.keras.backend.set_learning_phase(is_training) evaluator_options = eval_util.evaluator_options_from_eval_config( eval_config) class_agnostic_category_index = ( label_map_util.create_class_agnostic_category_index()) class_agnostic_evaluators = eval_util.get_evaluators( eval_config, list(class_agnostic_category_index.values()), evaluator_options) class_aware_evaluators = None if eval_input_config.label_map_path: class_aware_category_index = ( label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path)) class_aware_evaluators = eval_util.get_evaluators( eval_config, list(class_aware_category_index.values()), evaluator_options) evaluators = None loss_metrics = {} @tf.function def compute_eval_dict(features, labels): """Compute the evaluation result on an image.""" # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[ fields.InputDataFields.groundtruth_boxes].get_shape().as_list()) unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu labels = model_lib.unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) losses_dict, prediction_dict = _compute_losses_and_predictions_dicts( detection_model, features, labels, add_regularization_loss) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) # TODO(kaftan): Depending on how postprocessing will work for TPUS w/ ## TPUStrategy, may be good to move wrapping to a utility method if use_tpu and postprocess_on_cpu: detections = contrib_tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper( (prediction_dict, features[fields.InputDataFields.true_image_shape])) class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) # TODO(kaftan) (or anyone): move `_prepare_groundtruth_for_eval to eval_util ## and call this from there. groundtruth = model_lib._prepare_groundtruth_for_eval( # pylint: disable=protected-access detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[ fields.InputDataFields.original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) return eval_dict, losses_dict, class_agnostic for i, (features, labels) in enumerate(eval_dataset): eval_dict, losses_dict, class_agnostic = compute_eval_dict( features, labels) if i % 100 == 0: tf.logging.info('Finished eval step %d', i) use_original_images = fields.InputDataFields.original_image in features if not use_tpu and use_original_images: # Summary for input images. tf.compat.v2.summary.image(name='eval_input_images', step=global_step, data=eval_dict['original_image'], max_outputs=1) # Summary for prediction/groundtruth side-by-side images. if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) keypoint_edges = [(kp.start, kp.end) for kp in eval_config.keypoint_edge] sbys_image_list = vutils.draw_side_by_side_evaluation_image( eval_dict, category_index=category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) sbys_images = tf.concat(sbys_image_list, axis=0) tf.compat.v2.summary.image( name='eval_side_by_side', step=global_step, data=sbys_images, max_outputs=eval_config.num_visualizations) if evaluators is None: if class_agnostic: evaluators = class_agnostic_evaluators else: evaluators = class_aware_evaluators for evaluator in evaluators: evaluator.add_eval_dict(eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): if loss_key not in loss_metrics: loss_metrics[loss_key] = tf.keras.metrics.Mean() # Skip the loss with value equal or lower than 0.0 when calculating the # average loss since they don't usually reflect the normal loss values # causing spurious average loss value. if loss_tensor <= 0.0: continue loss_metrics[loss_key].update_state(loss_tensor) eval_metrics = {} for evaluator in evaluators: eval_metrics.update(evaluator.evaluate()) for loss_key in loss_metrics: eval_metrics[loss_key] = loss_metrics[loss_key].result() eval_metrics = {str(k): v for k, v in eval_metrics.items()} tf.logging.info('Eval metrics at step %d', global_step) for k in eval_metrics: tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step) tf.logging.info('\t+ %s: %f', k, eval_metrics[k]) return eval_metrics
def detect_in_image(image_path): detection_graph = load_graph() category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) trackers = {} MIN_SCORE_THRESH = .5 with detection_graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors image_tensor, tensor_dict = get_image_tensor() frame = cv2.imread(image_path) frame2 = np.copy(frame) image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # if frame_count % detection_rate == 0 or boxes is None: # Run inference output_dict1 = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) output_dict = get_output_dict(output_dict1) new_boxes, new_scores = get_eligible_boxes_scores( output_dict, MIN_SCORE_THRESH) # print('output_dict: ', output_dict) # print('\nnew_boxes: ', new_boxes) # print('\nnew_scores: ', new_scores) # use tracker # if trackers.count == 0: # # no tracker yet, to initialize them # for n in output_dict['num_detections']: # pass # Visualization of the results of a detection. # note: perform the detections using a higher threshold vis_util.visualize_boxes_and_labels_on_image_array( frame, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, line_thickness=6, skip_labels=True, skip_scores=True, use_normalized_coordinates=True, min_score_thresh=MIN_SCORE_THRESH) vis_util.visualize_boxes_and_labels_on_image_array( frame2, new_boxes, output_dict['detection_classes'], new_scores, category_index, line_thickness=6, skip_labels=True, skip_scores=True, use_normalized_coordinates=True, min_score_thresh=MIN_SCORE_THRESH) cv2.imwrite("D:\\temp\\frame1.jpg", frame) cv2.imwrite("D:\\temp\\frame2.jpg", frame2) print('image saved..') cv2.destroyAllWindows()
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config. unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes]. get_shape().as_list()) unpad_groundtruth_tensors = boxes_shape[ 1] is not None and not use_tpu labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[ fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[ fields.InputDataFields.groundtruth_keypoints] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[ fields.InputDataFields.groundtruth_weights] gt_confidences_list = None if fields.InputDataFields.groundtruth_confidences in labels: gt_confidences_list = labels[ fields.InputDataFields.groundtruth_confidences] gt_is_crowd_list = None if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[ fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_confidences_list=gt_confidences_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] if use_tpu and train_config.use_bfloat16: with tf.contrib.tpu.bfloat16_scope(): prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) for k, v in prediction_dict.items(): if v.dtype == tf.bfloat16: prediction_dict[k] = tf.cast(v, tf.float32) else: prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if use_tpu and postprocess_on_cpu: detections = tf.contrib.tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper( (prediction_dict, features[fields.InputDataFields.true_image_shape])) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = detection_model.regularization_losses() if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict[ 'Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) ## ADDED for multi-gpu training_optimizer = hvd.DistributedOptimizer( training_optimizer, device_dense='/cpu:0') # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = (train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = (train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None if train_config.summarize_gradients: summaries = [ 'gradients', 'gradient_norm', 'global_gradient_norm' ] train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: exported_output = exporter_lib.add_output_tensor_nodes(detections) export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[ fields.InputDataFields.original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) vis_metric_ops = None if not use_tpu and use_original_images: eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( eval_dict) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, list(category_index.values()), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if vis_metric_ops is not None: eval_metric_ops.update(vis_metric_ops) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours ) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: if scaffold is None: keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) scaffold = tf.train.Scaffold(saver=saver) return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
def final_pipeline1(test_img_path, detect_fn): utils_ops.tf = tf.compat.v1 tf.gfile = tf.io.gfile PATH_TO_LABELS = '/content/drive/MyDrive/label_map.pbtxt' category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) print('Running inference... ') image_np = resize_image(test_img_path) #print('\nimage shape : ', image_np.shape) # Things to try: # Flip horizontally # image_np = np.fliplr(image_np).copy() # Convert image to grayscale # image_np = np.tile( # np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. input_tensor = tf.convert_to_tensor(image_np) # The model expects a batch of images, so add an axis with `tf.newaxis`. #print(input_tensor.shape) input_tensor = input_tensor[tf.newaxis, ...] #print(input_tensor.shape) # input_tensor = np.expand_dims(image_np, 0) detections = detect_fn(input_tensor) #print(detections) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(detections.pop('num_detections')) detections = { key: value[0, :num_detections].numpy() for key, value in detections.items() } #print('detections : ',detections) detections['num_detections'] = num_detections detections['detection_classes'] = detections['detection_classes'].astype( np.int64) #print(detections) image_np_with_detections = image_np.copy() viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'], detections['detection_classes'], detections['detection_scores'], category_index, use_normalized_coordinates=True, max_boxes_to_draw=200, min_score_thresh=.29, agnostic_mode=False) #plt.figure(figsize=(10,10)) #plt.title('Detected Image : ') #plt.imshow(image_np_with_detections) st.text('Damage Detected Image : ') st.image(image_np_with_detections, caption='Detected Image')
def __init__(self, model_name, path_to_labels): self.model = self.load_model(model_name) self.category_index = label_map_util.create_category_index_from_labelmap( path_to_labels, use_display_name=True)
def structured_output(MODEL_NAME, FROZEN_GRAPH, LABELS, image_path): PATH_TO_FROZEN_GRAPH = os.path.join('parameters', MODEL_NAME, FROZEN_GRAPH) PATH_TO_LABELS = os.path.join('parameters', 'data', LABELS) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) IMAGE_SIZE = (12, 8) image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) output_dict = run_inference_for_single_image(image_np, detection_graph) d = {} box_ctr = 0 box_count = output_dict['num_detections'] if box_count == 0: return {} curr_class = int(output_dict['detection_classes'][box_ctr]) class_num = 0 while box_ctr < box_count: obj_score = output_dict['detection_scores'][box_ctr] xmin = output_dict['detection_boxes'][box_ctr][1] xmax = output_dict['detection_boxes'][box_ctr][3] ymin = output_dict['detection_boxes'][box_ctr][0] ymax = output_dict['detection_boxes'][box_ctr][2] obj_class = output_dict['detection_classes'][box_ctr] if int(curr_class) != int(obj_class): curr_class = int(obj_class) class_num = 1 else: class_num = class_num + 1 obj = { 'P-Score': obj_score, 'box': { 'xmin': xmin, 'xmax': xmax, 'ymin': ymin, 'ymax': ymax } } name = 'obj-' + str(curr_class) + '.0' '-' + str(class_num) dd = {name: obj} class_name = str(curr_class) + '.0' if curr_class not in d: d[class_name] = dd else: d[class_name].update(dd) box_ctr = box_ctr + 1 return d
def load_detec_labe_map(labe_map_path): label_map = label_map_util.create_category_index_from_labelmap( labe_map_path) return label_map
def tensorflow_detection(MODEL_NAME, FROZEN_GRAPH, LABELS, info): # Faster R-CNN Model # MODEL_NAME = 'faster_rcnn_resnet50_coco_2018_01_28' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_FROZEN_GRAPH = os.path.join('parameters', MODEL_NAME, FROZEN_GRAPH) # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('parameters', 'data', LABELS) # LOAD MODEL INTO MEMORY detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') #Label Map category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) images = info['images'] dummy_text = info['dummy_text'] directory = info['directory'] TEST_IMAGE_PATHS = image_list(images, dummy_text, directory) if not TEST_IMAGE_PATHS: return pd.DataFrame() # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) data = [] count = 1 for image_path in TEST_IMAGE_PATHS: print("image" + str(count) + " processing") print(str(float(100 * count) / images) + "%") image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. # vis_util.visualize_boxes_and_labels_on_image_array( # image_np, # output_dict['detection_boxes'], # output_dict['detection_classes'], # output_dict['detection_scores'], # category_index, # instance_masks=output_dict.get('detection_masks'), # use_normalized_coordinates=True, # line_thickness=8) # plt.figure(figsize=IMAGE_SIZE) # plt.imshow(image_np) # print(output_dict) data = update_data(output_dict, data, image_path) count = count + 1 print('') cols = [ 'Image', 'NumDetected', 'Class', 'Score', 'xmin', 'xmax', 'ymin', 'ymax' ] df = pd.DataFrame(data, columns=cols) return df
# Load model ROOT = '/home/JulioCesar/flores/MIA2/landmark_detection' PATH_TO_SAVED_MODEL = os.path.join(ROOT, 'exported', 'train_2', 'ckpt-6', 'saved_model') PATH_TO_LABELS = os.path.join(ROOT, 'landmarks_label_map.pbtxt') print('Model path {}'.format(PATH_TO_SAVED_MODEL)) print('Label path {}'.format(PATH_TO_LABELS)) # Load saved model and build the detection function detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL) print('Model loaded') # Load label map category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS) print('Category index', category_index) # Run detections def load_image_into_numpy_array(path): """Load an image from file into a numpy array. Puts image into numpy array to feed into tensorflow graph. Note that by convention we put it into a numpy array with shape (height, width, channels), where channels=3 for RGB. Args: path: the file path to the image Returns:
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: labels = unstack_batch(labels, unpad_groundtruth_tensors=False) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, sets finetune_checkpoint_type based on # from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) if not use_tpu: tf.summary.scalar('regularization_loss', regularization_loss) total_loss = tf.add_n(losses, name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if use_tpu: training_optimizer = tpu_optimizer.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None if mode == tf.estimator.ModeKeys.EVAL: # Detection summaries during eval. class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _get_groundtruth_data(detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = ( features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=False) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)) tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single image. eval_metrics = eval_config.metrics_set if not eval_metrics: eval_metrics = ['coco_detection_metrics'] eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metrics, category_index.values(), eval_dict, include_metrics_per_category=False) if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape() .as_list()) unpad_groundtruth_tensors = True if boxes_shape[1] is not None else False labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=labels[ fields.InputDataFields.groundtruth_weights], groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features eval_images = ( features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( eval_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) img_summary = None if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2, use_normalized_coordinates=False)) img_summary = tf.summary.image('Detections_Left_Groundtruth_Right', detection_and_groundtruth) # Eval metrics on a single example. eval_metrics = eval_config.metrics_set if not eval_metrics: eval_metrics = ['coco_detection_metrics'] eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metrics, category_index.values(), eval_dict, include_metrics_per_category=eval_config.include_metrics_per_category) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if img_summary is not None: eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( img_summary, tf.no_op()) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.iteritems()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) scaffold = tf.train.Scaffold(saver=saver) if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)
if save_path: out.release() cv2.destroyAllWindows() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Detect objects inside webcam videostream') parser.add_argument('-m', '--model', type=str, required=True, help='Model Path') parser.add_argument('-l', '--labelmap', type=str, required=True, help='Path to Labelmap') parser.add_argument('-v', '--video_path', type=str, default='', help='Path to video. If None camera will be used') parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Detection threshold') parser.add_argument('-roi', '--roi_position', type=float, default=0.6, help='ROI Position (0-1)') parser.add_argument('-la', '--labels', nargs='+', type=str, help='Label names to detect (default="all-labels")') parser.add_argument('-a', '--axis', default=True, action="store_false", help='Axis for cumulative counting (default=x axis)') parser.add_argument('-s', '--skip_frames', type=int, default=20, help='Number of frames to skip between using object detection model') parser.add_argument('-sh', '--show', default=True, action="store_false", help='Show output') parser.add_argument('-sp', '--save_path', type=str, default='', help= 'Path to save the output. If None output won\'t be saved') args = parser.parse_args() detection_model = load_model(args.model) category_index = label_map_util.create_category_index_from_labelmap(args.labelmap, use_display_name=True) if args.video_path != '': cap = cv2.VideoCapture(args.video_path) else: cap = cv2.VideoCapture(0) if not cap.isOpened(): print("Error opening video stream or file") run_inference(detection_model, category_index, cap, labels=args.labels, threshold=args.threshold, roi_position=args.roi_position, x_axis=args.axis, skip_frames=args.skip_frames, save_path=args.save_path, show=args.show)
def ck_postprocess(i): def evaluate(processed_image_ids, categories_list): # Convert annotations from original format of the dataset # to a format specific for a tool that will calculate metrics if DATASET_TYPE != METRIC_TYPE: print('\nConvert annotations from {} to {} ...'.format( DATASET_TYPE, METRIC_TYPE)) annotations = converter_annotations.convert( ANNOTATIONS_PATH, ANNOTATIONS_OUT_DIR, DATASET_TYPE, METRIC_TYPE) else: annotations = ANNOTATIONS_PATH # Convert detection results from our universal text format # to a format specific for a tool that will calculate metrics print('\nConvert results to {} ...'.format(METRIC_TYPE)) results = converter_results.convert(DETECTIONS_OUT_DIR, RESULTS_OUT_DIR, DATASET_TYPE, MODEL_DATASET_TYPE, METRIC_TYPE) # Run evaluation tool print('\nEvaluate metrics as {} ...'.format(METRIC_TYPE)) if METRIC_TYPE == ck_utils.COCO: mAP, recall, all_metrics = calc_metrics_coco.evaluate_via_pycocotools( processed_image_ids, results, annotations) elif METRIC_TYPE == ck_utils.COCO_TF: mAP, recall, all_metrics = calc_metrics_coco.evaluate_via_tf( categories_list, results, annotations, FULL_REPORT) elif METRIC_TYPE == ck_utils.OID: mAP, _, all_metrics = calc_metrics_oid.evaluate( results, annotations, LABELMAP_FILE, FULL_REPORT) recall = 'N/A' else: raise ValueError( 'Metrics type is not supported: {}'.format(METRIC_TYPE)) OPENME['mAP'] = mAP OPENME['recall'] = recall OPENME['metrics'] = all_metrics return OPENME = {} with open(IMAGE_LIST_FILE, 'r') as f: processed_image_ids = json.load(f) if os.path.isfile(TIMER_JSON): with open(TIMER_JSON, 'r') as f: OPENME = json.load(f) # Run evaluation ck_utils.print_header('Process results') category_index = label_map_util.create_category_index_from_labelmap( LABELMAP_FILE, use_display_name=True) categories_list = category_index.values() evaluate(processed_image_ids, categories_list) # Store benchmark results with open(TIMER_JSON, 'w') as o: json.dump(OPENME, o, indent=2, sort_keys=True) # Print metrics print('\nSummary:') print('-------------------------------') print('Graph loaded in {:.6f}s'.format(OPENME.get('graph_load_time_s', 0))) print('All images loaded in {:.6f}s'.format( OPENME.get('images_load_time_total_s', 0))) print('All images detected in {:.6f}s'.format( OPENME.get('detection_time_total_s', 0))) print('Average detection time: {:.6f}s'.format( OPENME.get('detection_time_avg_s', 0))) print('mAP: {}'.format(OPENME['mAP'])) print('Recall: {}'.format(OPENME['recall'])) print('--------------------------------\n') return {'return': 0}
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model" print('Loading model...', end='') start_time = time.time() # LOAD SAVED MODEL AND BUILD DETECTION FUNCTION detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL) end_time = time.time() elapsed_time = end_time - start_time print('Done! Took {} seconds'.format(elapsed_time)) # LOAD LABEL MAP DATA FOR PLOTTING category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) import numpy as np from PIL import Image import matplotlib.pyplot as plt import warnings warnings.filterwarnings('ignore') # Suppress Matplotlib warnings def load_image_into_numpy_array(path): """Load an image from file into a numpy array. Puts image into numpy array to feed into tensorflow graph. Note that by convention we put it into a numpy array with shape (height, width, channels), where channels=3 for RGB. Args: path: the file path to the image
def get_image(): if request.method == 'POST': # Unpack request image = request.files["images"] IMAGE_REQUEST = image.filename image.save(os.path.join(os.getcwd(), 'detections', 'tmp', IMAGE_REQUEST)) IMAGE_PATH = os.path.join(os.getcwd(), 'detections', 'tmp', IMAGE_REQUEST) place = dict(request.form)["place"] category_index = label_map_util.create_category_index_from_labelmap('./Tensorflow/annotations/label_map.pbtxt') # Detect license plate object command = "python detect.py --images ./detections/tmp/{} ".format(IMAGE_REQUEST) os.system(command) IMAGE_CROPPED = os.path.join(os.getcwd(), 'detections', 'crop', 'license_plate_1.png') # Detect digit license plate img = cv2.imread(IMAGE_CROPPED) image_np = np.array(img) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections = dtf.detect_fn(input_tensor) num_detections = int(detections.pop('num_detections')) detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()} detections['num_detections'] = num_detections # detection_classes should be ints. detections['detection_classes'] = detections['detection_classes'].astype(np.int64) label_id_offset = 1 image_np_with_detections = image_np.copy() boxes = detections['detection_boxes'] max_boxes_to_draw = 8 scores = detections['detection_scores'] class_idx = detections['detection_classes'] + label_id_offset min_score_thresh = 0.5 result = [] for i in range(min(max_boxes_to_draw, boxes.shape[0])): if scores[i] > min_score_thresh: item = {"label": category_index[class_idx[i]]["name"], "score": scores[i], "boxes": boxes[i][1] } result.append(item) result = sorted(result, key=lambda k: k['boxes']) digit_plate = "" for digit in result: digit_plate = digit_plate + digit["label"] print("digits detected: ", digit_plate) # Filter query to database vehicle = db.session.query(Vehicle).filter_by(plate_number=digit_plate).scalar() transaction = db.session.query(Transaction).filter_by(plate_number=digit_plate, place=place, isDone=False).scalar() # If user with plate_number <digit_plate> parking at <place> --> want to quit parking lot if (vehicle is not None) and (transaction is not None): try: # Add time_out transaction.time_out = datetime.datetime.now().time() db.session.commit() # Add parking fee time_enter = datetime.datetime.combine(datetime.date.today(), transaction.time_enter) time_out = datetime.datetime.combine(datetime.date.today(), transaction.time_out) time_diff = (time_out - time_enter).total_seconds() time_diff_in_hours = time_diff/3600 if time_diff_in_hours < 1: transaction.price = LOW_PRICE else: transaction.price = ceil(time_diff_in_hours) * HIGH_PRICE db.session.commit() time_enter = transaction.time_enter time_out = transaction.time_out price = transaction.price hours = floor(time_diff/3600) minutes = floor((time_diff%3600)/60) seconds = floor(time_diff%60) # Query select device_token device_token = db.session.query(Device.device_token).filter_by(id_user=vehicle.id_user).scalar() # Sent post to android notif_data = json.dumps({ "to" : "{}".format(device_token), "data" : { "body": "Please pay the parking fare!", "title":"You are going out", "timein": time_enter.strftime("%H:%M:%S"), "timeout": time_out.strftime("%H:%M:%S"), "totaltime": "{}h {}m {}s".format(hours, minutes, seconds), "fare": "{}".format(price), "location": place }, "notification": { "body": "Please pay the parking fare!", "title": "You are going out", "click_action": "com.dicoding.nextparking.ui.payment.PaymentActivity" } }) send_notification(notif_data) data = { "response": "update transaction succeeded", "id_user": transaction.id_user, "id_transaction": transaction.id_transaction, "plate_number": transaction.plate_number, "place": transaction.place, "time_enter": time_enter.strftime("%H:%M:%S"), "time_out": time_out.strftime("%H:%M:%S"), "price": str(transaction.price) } print("update transaction succeeded") return render_template('parking.html', data=data) except: data = { "response": "update transaction failed", "id_user": transaction.id_user, "id_transaction": transaction.id_transaction, "plate_number": transaction.plate_number, "place": transaction.place, "time_enter": time_enter.strftime("%H:%M:%S"), } print("update transaction failed") return render_template('parking.html', data=data) # If user with plate_number <digit_plate> is exist and want to parking at <place> elif (vehicle is not None): try: # Add new transaction print(datetime.datetime.now().time()) new_transaction = Transaction(id_user=vehicle.id_user, plate_number=vehicle.plate_number, place=place, time_enter=datetime.datetime.now().time()) db.session.add(new_transaction) db.session.commit() time_enter = new_transaction.time_enter # Query select device_token device_token = db.session.query(Device.device_token).filter_by(id_user=vehicle.id_user).scalar() # Sent post to android notif_data = json.dumps({ "to" : "{}".format(device_token), "data" : { "body": "You are entering {} parking lot!".format(place), "title":"You are going in", "timein": time_enter.strftime("%H:%M:%S"), "location": place }, "notification": { "body": "You are entering {} parking lot!".format(place), "title":"You are going in", "click_action": "com.dicoding.nextparking.HomeActivity" } }) send_notification(notif_data) data = { "response": "add new transaction record succeed", "id_user": new_transaction.id_user, "id_transaction": new_transaction.id_transaction, "plate_number": new_transaction.plate_number, "place": new_transaction.place, "time_enter": time_enter.strftime("%H:%M:%S"), "time_out": str(new_transaction.time_out), "price": str(new_transaction.price) } print("Add new record succeded") return render_template('parking.html', data=data) except: data = { "response": "add new transaction record failed", "id_user": vehicle.id_user, "plate_number": digit_plate } print("add new transaction record failed") return render_template('parking.html', data=data) # If user not found else: data = { "response": "user not found", "plate_number": digit_plate } print("user not found") return render_template('parking.html', data=data) else: return render_template('index.html')
def mask_inference_webcam_2(self, camera_input, camera_width, camera_height): cap, out_file = set_input_camera(camera_input, camera_width, camera_height, self.__images_name_prefix + ".mp4") counter = 1 while True: # Read frame from camera ret, image_np = cap.read() # expand image to have shape :[1, None, None,3] image_np_expanded = np.expand_dims(image_np, axis=0) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.uint8) detections = self.__model(input_tensor) image_np_with_detections = image_np.copy() try: from object_detection.utils import ops as utils_ops from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as viz_utils category_index = label_map_util.create_category_index_from_labelmap( self.__path_to_labels, use_display_name=True) label_id_offset = 0 if 'detection_masks' in detections: detection_masks = tf.convert_to_tensor( detections['detection_masks'][0]) detection_boxes = tf.convert_to_tensor( detections['detection_boxes'][0]) # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_np.shape[0], image_np.shape[1]) detection_masks_reframed = tf.cast( detection_masks_reframed > 0.5, tf.uint8) detections[ 'detection_masks_reframed'] = detection_masks_reframed.numpy( ) viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, instance_masks=detections.get('detection_masks_reframed', None), use_normalized_coordinates=True, max_boxes_to_draw=100, min_score_thresh=self.__threshold, agnostic_mode=False, line_thickness=2) except ImportError: click.echo( click.style( f"\n The mask wil not be apply. Object detection API is not availabe \n", bold=True, fg='red')) classIds = detections['detection_classes'][0].numpy() scores = detections['detection_scores'][0].numpy() boxes = detections['detection_boxes'][0].numpy() masks = detections['detection_masks'][0].numpy() # Visualize detected bounding boxes. for i in range(boxes.shape[0]): if scores[i] > self.__threshold: score = scores[i] bbox = boxes[i] instance_mask = masks[i] classId = classIds[i] img = self.visualize_bbox_mask_pil( image_np_with_detections, score, bbox, instance_mask, classId) img_path = os.path.join( PATH_DIR_IMAGE_INF, self.__images_name_prefix + "_savedmodel_.png") out_file.write(image_np_with_detections) cv.imshow(self.__images_name_prefix, image_np_with_detections) if cv.waitKey(25) & 0xFF == ord('q'): break cap.release() cv.destroyAllWindows()
def vehicle_detector(): ''' Detect vehicles on video stream using TF Object Detection API. 1- Download object detection pre-trained model from TF Model Zoo 2- Pass video stream to TF API and get object detection_scores 3- Visualize boxes with detected objects 4- Define approximate distance to objects in users trajectory (not on the sides) 5- Return distance to objects in case of possible collision detected ''' # %% # Create the data directory # ~~~~~~~~~~~~~~~~~~~~~~~~~ # The snippet shown below will create the ``data`` directory where all our data will be stored. The # code will create a directory structure as shown bellow: # # .. code-block:: bash # # data # └── models # # where the ``models`` folder will will contain the downloaded models. DATA_DIR = os.path.join(os.getcwd(), 'config/collect_data/vehicle_detector_data') MODELS_DIR = os.path.join(DATA_DIR, 'models') for dir in [DATA_DIR, MODELS_DIR]: if not os.path.exists(dir): os.mkdir(dir) # %% # Download the model # ~~~~~~~~~~~~~~~~~~ # The code snippet shown below is used to download the object detection model checkpoint file, # as well as the labels file (.pbtxt) which contains a list of strings used to add the correct # label to each detection (e.g. person). # # The particular detection algorithm we will use is the `SSD MobileNet V2 320x320`. More # models can be found in the `TensorFlow 2 Detection Model Zoo <https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md>`_. # To use a different model you will need the URL name of the specific model. This can be done as # follows: # # 1. Right click on the `Model name` of the model you would like to use; # 2. Click on `Copy link address` to copy the download link of the model; # 3. Paste the link in a text editor of your choice. You should observe a link similar to ``download.tensorflow.org/models/object_detection/tf2/YYYYYYYY/XXXXXXXXX.tar.gz``; # 4. Copy the ``XXXXXXXXX`` part of the link and use it to replace the value of the ``MODEL_NAME`` variable in the code shown below; # 5. Copy the ``YYYYYYYY`` part of the link and use it to replace the value of the ``MODEL_DATE`` variable in the code shown below. # # For example, the download link for the model used below is: ``download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet101_v1_fpn_640x640_coco17_tpu-8.tar.gz`` # Download and extract model MODEL_DATE = '20200711' MODEL_NAME = "ssd_mobilenet_v2_320x320_coco17_tpu-8" MODEL_TAR_FILENAME = MODEL_NAME + '.tar.gz' MODELS_DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/tf2/' MODEL_DOWNLOAD_LINK = MODELS_DOWNLOAD_BASE + MODEL_DATE + '/' + MODEL_TAR_FILENAME PATH_TO_MODEL_TAR = os.path.join(MODELS_DIR, MODEL_TAR_FILENAME) PATH_TO_CKPT = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'checkpoint/')) PATH_TO_CFG = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, 'pipeline.config')) if not os.path.exists(PATH_TO_CKPT): print('Downloading model. This may take a while... ', end='') urllib.request.urlretrieve(MODEL_DOWNLOAD_LINK, PATH_TO_MODEL_TAR) tar_file = tarfile.open(PATH_TO_MODEL_TAR) tar_file.extractall(MODELS_DIR) tar_file.close() os.remove(PATH_TO_MODEL_TAR) print('Done') # Download labels file LABEL_FILENAME = 'mscoco_label_map.pbtxt' LABELS_DOWNLOAD_BASE = \ 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/' PATH_TO_LABELS = os.path.join(MODELS_DIR, os.path.join(MODEL_NAME, LABEL_FILENAME)) if not os.path.exists(PATH_TO_LABELS): print('Downloading label file... ', end='') urllib.request.urlretrieve(LABELS_DOWNLOAD_BASE + LABEL_FILENAME, PATH_TO_LABELS) print('Done') # %% # Load the model # ~~~~~~~~~~~~~~ # Enable GPU dynamic memory allocation physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) # Load pipeline config and build a detection model configs = config_util.get_configs_from_pipeline_file(PATH_TO_CFG) model_config = configs['model'] detection_model = model_builder.build(model_config=model_config, is_training=False) # Restore checkpoint ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(os.path.join(PATH_TO_CKPT, 'ckpt-0')).expect_partial() # @tf.function def detect_fn(image): """Detect objects in image.""" image, shapes = detection_model.preprocess(image) prediction_dict = detection_model.predict(image, shapes) detections = detection_model.postprocess(prediction_dict, shapes) return detections, prediction_dict, tf.reshape(shapes, [-1]) # %% # Load label map data (for plotting) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Label maps correspond index numbers to category names, so that when our convolution network # predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility # functions, but anything that returns a dictionary mapping integers to appropriate string labels # would be fine. category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) # %% # Define the video stream # ~~~~~~~~~~~~~~~~~~~~~~~ # We will use `OpenCV <https://pypi.org/project/opencv-python/>`_ to capture the video stream. # For more information you can refer to the `OpenCV-Python Tutorials <https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html#capture-video-from-camera>`_ # %% # Putting everything together # ~~~~~~~~~~~~~~~~~~~~~~~~~~~ # The code shown below loads an image, runs it through the detection model and visualizes the # detection results, including the keypoints. # # Note that this will take a long time (several minutes) the first time you run this code due to # tf.function's trace-compilation --- on subsequent runs (e.g. on new images), things will be # faster. # # Here are some simple things to try out if you are curious: # # * Modify some of the input images and see if detection still works. Some simple things to try out here (just uncomment the relevant portions of code) include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels). # * Print out `detections['detection_boxes']` and try to match the box locations to the boxes in the image. Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]). # * Set ``min_score_thresh`` to other values (between 0 and 1) to allow more detections in or to filter out more detections. # while True: # Read frame from screen # screen = cv2.resize(grab_screen(region=(40,40,800,450)), (320,320)) screen = grab_screen(region=(40, 100, 800, 450)) image_np = cv2.cvtColor(screen, cv2.COLOR_BGR2RGB) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) last_time = time.time() detections, predictions_dict, shapes = detect_fn(input_tensor) print('Time to load model: {}'.format(time.time() - last_time)) # label_id_offset = 1 image_np_with_detections = image_np.copy() # viz_utils.visualize_boxes_and_labels_on_image_array( # image_np_with_detections, # detections['detection_boxes'][0].numpy(), # (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), # detections['detection_scores'][0].numpy(), # category_index, # use_normalized_coordinates=True, # max_boxes_to_draw=20, # min_score_thresh=.50, # agnostic_mode=False) # Define collision & apx_distance default values collision = False apx_distance = 1.0 for i, b in enumerate(detections['detection_boxes'][0]): # car bus truck if detections['detection_classes'][0][i] == 2 or detections[ 'detection_classes'][0][i] == 5 or detections[ 'detection_classes'][0][i] == 7: if detections['detection_scores'][0][i] >= 0.5: mid_x = (detections['detection_boxes'][0][i][1] + detections['detection_boxes'][0][i][3]) / 2 mid_y = (detections['detection_boxes'][0][i][0] + detections['detection_boxes'][0][i][2]) / 2 aspect_ratio = (detections['detection_boxes'][0][i][3] - detections['detection_boxes'][0][i][1]) / ( detections['detection_boxes'][0][i][2] - detections['detection_boxes'][0][i][0]) apx_distance = np.round( ((1 - (detections['detection_boxes'][0][i][3] - detections['detection_boxes'][0][i][1]))**4), 2) # cv2.putText(image_np_with_detections, '{}'.format(str(apx_distance)), (int(mid_x*800),int(mid_y*450)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2) # Possible collision if apx_distance <= 0.5 and mid_x > 0.3 and mid_x < 0.7 and aspect_ratio < 2: collision = True return np.array([collision, apx_distance]) # cv2.putText(image_np_with_detections, 'WARNING!!!', (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 3) # # Display output # cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800,450))) # # if cv2.waitKey(25) & 0xFF == ord('q'): # break # # screen.release() # cv2.destroyAllWindows() return np.array([collision, apx_distance])
def __init__(self, model_path, label_path): self.detect_fn = tf.saved_model.load(model_path) self.category_index = label_map_util.create_category_index_from_labelmap( label_path, use_display_name=True) self.name = os.path.basename(os.path.dirname(model_path))
def detect_image(img_paths): img_paths.sort() # load label map category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS) # load detection graph detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # define input/output tensors image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') # load input imgs img_batches = list(split_into_batches(img_paths, batch_size)) boxes_df = pd.DataFrame(columns=[ 'img_frame', 'detection', 'ymin', 'xmin', 'ymax', 'xmax', 'score' ]) boxes_df = boxes_df.set_index(['img_frame', 'detection']) ydim, xdim, _ = cv2.imread(img_paths[0]).shape pad_len = int(ydim * pad_pcnt) + 1 for batch in img_batches: imgs = [cv2.imread(image_path) for image_path in batch] imgs = [img[..., ::-1] for img in imgs] #BGR to RGB imgs = [ np.pad(img, [(0, pad_len), (0, 0), (0, 0)], 'constant') for img in imgs ] try: img_concat = np.concatenate(imgs) except: import pdb pdb.set_trace() print('oh noes') # run inference with detection_graph.as_default(): with tf.Session() as sess: all_boxes, all_scores, all_classes, _ = sess.run( [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: np.expand_dims(img_concat, 0)}) #boxes format [ymin, xmin, ymax, xmax] in percent s_all_boxes = np.squeeze(all_boxes) s_all_scores = np.squeeze(all_scores) all_ydim = img_concat.shape[0] all_xdim = img_concat.shape[1] s_all_boxes = [ np.array([ int(round(box[0] * all_ydim)), int(round(box[1] * all_xdim)), int(round(box[2] * all_ydim)), int(round(box[3] * all_xdim)), s_all_scores[ind] ]) for ind, box in enumerate(s_all_boxes) ] best_all_boxes = [ box for ind, box in enumerate(s_all_boxes) if box[4] > thresh ] #for box in np.squeeze(boxes): for img_ind, image_path in enumerate(batch): image_path_stem = str(Path(image_path).stem) #convert s_all_boxes and boxes for this image! best_boxes = [ adjust_box_locs(box, ydim, img_ind, pad_len) for box in best_all_boxes if ((img_ind) * (ydim + pad_len) <= (box[0] + box[2]) / 2 < (img_ind + 1) * (ydim + pad_len)) ] #this doesn't capture the edge case of a fish being in say box 1, but the bbox extends past through pad_len into box 2 below... maybe just make pad_len bigger?? #import pdb; pdb.set_trace() if len(best_boxes) == 0: boxes_df.loc[(image_path_stem, 0), :] = [None, None, None, None, None] for ind, box in enumerate(best_boxes): boxes_df.loc[(image_path_stem, ind + 1), :] = box fn = str( Path(output_dir, Path(output_dir).stem) ) + '_detections_output.csv' #because output_dir.stem is the video name boxes_df.to_csv(fn)
def start_stream(args): PATH_TO_MODEL_DIR = args.model PATH_TO_LABELS = args.labels MIN_CONF_THRESH = float(args.threshold) PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model" start_time = time.time() detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL) end_time = time.time() elapsed_time = end_time - start_time print('Loading model took {} seconds'.format(elapsed_time)) # Load label map data for plotting category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) warnings.filterwarnings('ignore') print('Create stream for PiCamera') videostream = VideoStream(resolution=(640, 480), framerate=30).start() while True: frame = videostream.read() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame_expanded = np.expand_dims(frame_rgb, axis=0) imH, imW, _ = frame.shape input_tensor = tf.convert_to_tensor(frame) input_tensor = input_tensor[tf.newaxis, ...] detections = detect_fn(input_tensor) num_detections = int(detections.pop('num_detections')) detections = { key: value[0, :num_detections].numpy() for key, value in detections.items() } detections['num_detections'] = num_detections detections['detection_classes'] = detections[ 'detection_classes'].astype(np.int64) detections['detection_classes'] = detections[ 'detection_classes'].astype(np.int64) scores = detections['detection_scores'] boxes = detections['detection_boxes'] classes = detections['detection_classes'] count = 0 for i in range(len(scores)): if ((scores[i] > MIN_CONF_THRESH) and (scores[i] <= 1.0)): count += 1 # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) # Draw label object_name = category_index[int(classes[i])]['name'] label = '%s: %d%%' % (object_name, int(scores[i] * 100)) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size label_ymin = max(ymin, labelSize[1] + 10) cv2.rectangle( frame, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in cv2.putText(frame, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text cv2.putText(frame, 'Objects Detected : ' + str(count), (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (70, 235, 52), 2, cv2.LINE_AA) cv2.imshow('Object Detector', frame) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows()
#Loading the saved_model(change the path according to your directory names) import tensorflow as tf import time from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as viz_utils PATH_TO_SAVED_MODEL="/content/drive/MyDrive/models/research/object_detection/test_directory/saved_model" print('Loading model...', end='')# Load saved model and build the detection function detect_fn=tf.saved_model.load(PATH_TO_SAVED_MODEL) print('Done!') #Loading the label_map category_index=label_map_util.create_category_index_from_labelmap("/content/drive/MyDrive/models/research/object_detection/data/mscoco_label_map.pbtxt",use_display_name=True) #category_index=label_map_util.create_category_index_from_labelmap([path_to_label_map],use_display_name=True) #Loading the image img=['/content/drive/MyDrive/models/research/object_detection/test_images/20200302_143518.JPG'] print(img)#list containing paths of all the images def load_image_into_numpy_array(path): return np.array(Image.open(path)) for image_path in img: print('Running inference for {}... '.format(image_path), end='') image_np=load_image_into_numpy_array(image_path) input_tensor=tf.convert_to_tensor(image_np) input_tensor=input_tensor[tf.newaxis, ...]
configs = config_util.get_configs_from_pipeline_file('pipeline.config') detection_model = model_builder.build(model_config=configs['model'], is_training=False) # Restore checkpoint ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(os.path.join( 'my_ssd_mobnet/', 'ckpt-17')).expect_partial() @tf.function def detect_fn(image): image, shapes = detection_model.preprocess(image) prediction_dict = detection_model.predict(image, shapes) detections = detection_model.postprocess(prediction_dict, shapes) return detections category_index = label_map_util.create_category_index_from_labelmap('label_map.pbtxt') cap = cv2.VideoCapture(0) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) while True: ret, frame = cap.read() image_np = np.array(frame) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections = detect_fn(input_tensor) num_detections = int(detections.pop('num_detections')) detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()}
def eager_eval_loop( detection_model, configs, eval_dataset, use_tpu=False, postprocess_on_cpu=False, global_step=None, ): """Evaluate the model eagerly on the evaluation dataset. This method will compute the evaluation metrics specified in the configs on the entire evaluation dataset, then return the metrics. It will also log the metrics to TensorBoard. Args: detection_model: A DetectionModel (based on Keras) to evaluate. configs: Object detection configs that specify the evaluators that should be used, as well as whether regularization loss should be included and if bfloat16 should be used on TPUs. eval_dataset: Dataset containing evaluation data. use_tpu: Whether a TPU is being used to execute the model for evaluation. postprocess_on_cpu: Whether model postprocessing should happen on the CPU when using a TPU to execute the model. global_step: A variable containing the training step this model was trained to. Used for logging purposes. Returns: A dict of evaluation metrics representing the results of this evaluation. """ del postprocess_on_cpu train_config = configs['train_config'] eval_input_config = configs['eval_input_config'] eval_config = configs['eval_config'] add_regularization_loss = train_config.add_regularization_loss is_training = False detection_model._is_training = is_training # pylint: disable=protected-access tf.keras.backend.set_learning_phase(is_training) evaluator_options = eval_util.evaluator_options_from_eval_config( eval_config) batch_size = eval_config.batch_size class_agnostic_category_index = ( label_map_util.create_class_agnostic_category_index()) class_agnostic_evaluators = eval_util.get_evaluators( eval_config, list(class_agnostic_category_index.values()), evaluator_options) class_aware_evaluators = None if eval_input_config.label_map_path: class_aware_category_index = ( label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path)) class_aware_evaluators = eval_util.get_evaluators( eval_config, list(class_aware_category_index.values()), evaluator_options) evaluators = None loss_metrics = {} @tf.function def compute_eval_dict(features, labels): """Compute the evaluation result on an image.""" # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape().as_list()) unpad_groundtruth_tensors = (boxes_shape[1] is not None and not use_tpu and batch_size == 1) groundtruth_dict = labels labels = model_lib.unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) losses_dict, prediction_dict = _compute_losses_and_predictions_dicts( detection_model, features, labels, add_regularization_loss) prediction_dict = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) eval_features = { fields.InputDataFields.image: features[fields.InputDataFields.image], fields.InputDataFields.original_image: features[fields.InputDataFields.original_image], fields.InputDataFields.original_image_spatial_shape: features[fields.InputDataFields.original_image_spatial_shape], fields.InputDataFields.true_image_shape: features[fields.InputDataFields.true_image_shape], inputs.HASH_KEY: features[inputs.HASH_KEY], } return losses_dict, prediction_dict, groundtruth_dict, eval_features agnostic_categories = label_map_util.create_class_agnostic_category_index() per_class_categories = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) keypoint_edges = [ (kp.start, kp.end) for kp in eval_config.keypoint_edge] strategy = tf.compat.v2.distribute.get_strategy() for i, (features, labels) in enumerate(eval_dataset): try: (losses_dict, prediction_dict, groundtruth_dict, eval_features) = strategy.run( compute_eval_dict, args=(features, labels)) except Exception as exc: # pylint:disable=broad-except tf.logging.info('Encountered %s exception.', exc) tf.logging.info('A replica probably exhausted all examples. Skipping ' 'pending examples on other replicas.') break (local_prediction_dict, local_groundtruth_dict, local_eval_features) = tf.nest.map_structure( strategy.experimental_local_results, [prediction_dict, groundtruth_dict, eval_features]) local_prediction_dict = concat_replica_results(local_prediction_dict) local_groundtruth_dict = concat_replica_results(local_groundtruth_dict) local_eval_features = concat_replica_results(local_eval_features) eval_dict, class_agnostic = prepare_eval_dict(local_prediction_dict, local_groundtruth_dict, local_eval_features) for loss_key, loss_tensor in iter(losses_dict.items()): losses_dict[loss_key] = strategy.reduce(tf.distribute.ReduceOp.MEAN, loss_tensor, None) if class_agnostic: category_index = agnostic_categories else: category_index = per_class_categories if i % 100 == 0: tf.logging.info('Finished eval step %d', i) use_original_images = fields.InputDataFields.original_image in features if (use_original_images and i < eval_config.num_visualizations): sbys_image_list = vutils.draw_side_by_side_evaluation_image( eval_dict, category_index=category_index, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False, keypoint_edges=keypoint_edges or None) for j, sbys_image in enumerate(sbys_image_list): tf.compat.v2.summary.image( name='eval_side_by_side_{}_{}'.format(i, j), step=global_step, data=sbys_image, max_outputs=eval_config.num_visualizations) if eval_util.has_densepose(eval_dict): dp_image_list = vutils.draw_densepose_visualizations( eval_dict) for j, dp_image in enumerate(dp_image_list): tf.compat.v2.summary.image( name='densepose_detections_{}_{}'.format(i, j), step=global_step, data=dp_image, max_outputs=eval_config.num_visualizations) if evaluators is None: if class_agnostic: evaluators = class_agnostic_evaluators else: evaluators = class_aware_evaluators for evaluator in evaluators: evaluator.add_eval_dict(eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): if loss_key not in loss_metrics: loss_metrics[loss_key] = [] loss_metrics[loss_key].append(loss_tensor) eval_metrics = {} for evaluator in evaluators: eval_metrics.update(evaluator.evaluate()) for loss_key in loss_metrics: eval_metrics[loss_key] = tf.reduce_mean(loss_metrics[loss_key]) eval_metrics = {str(k): v for k, v in eval_metrics.items()} tf.logging.info('Eval metrics at step %d', global_step.numpy()) for k in eval_metrics: tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step) tf.logging.info('\t+ %s: %f', k, eval_metrics[k]) return eval_metrics
def detect_in_video(input_video_path, output_video_path, detection_rate=5): # VideoWriter is the responsible of creating a copy of the video # used for the detections but with the detections overlays. Keep in # mind the frame size has to be the same as original video. # out = cv2.VideoWriter('output_video_path', cv2.VideoWriter_fourcc('F', 'M', 'P', '4'), 10, (1280, 720)) detection_graph = load_graph() category_index = label_map_util.create_category_index_from_labelmap( PATH_TO_LABELS, use_display_name=True) cap = cv2.VideoCapture(input_video_path) frame_count = 0 MIN_SCORE_THRESH = .5 trackers = [] boxes = None scores = None with detection_graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors image_tensor, tensor_dict = get_image_tensor() while cap.isOpened(): # Read the frame ret, frame = cap.read() if ret is False: break frame_count += 1 image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frame_count % detection_rate == 0 or boxes is None: # Run inference output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) output_dict = get_output_dict(output_dict) trackers.clear() boxes.clear() scores.clear() boxes, scores = get_eligible_boxes_scores( output_dict, MIN_SCORE_THRESH) else: # use tracker if trackers.count == 0 and boxes.count > 0: # no tracker but boxes are present, so initialize the trackers for i in range(scores.count): trackers.append( Tracker(cv2.TrackerCSRT_create, frame, boxes[i])) else: # we have trackers, so update them for i in range(trackers.count): success, box = trackers[i].update(frame) boxes[i] = box # Visualization of the results of a detection. # note: perform the detections using a higher threshold vis_util.visualize_boxes_and_labels_on_image_array( frame, boxes, output_dict['detection_classes'], scores, category_index, line_thickness=6, skip_labels=True, skip_scores=True, use_normalized_coordinates=True, min_score_thresh=MIN_SCORE_THRESH) cv2.imshow('frame', frame) # output_rgb = cv2.cvtColor(color_frame, cv2.COLOR_RGB2BGR) # out.write(output_rgb) if cv2.waitKey(1) & 0xFF == ord('q'): break # out.release() cap.release() cv2.destroyAllWindows()
def _load_label_map(self): category_index = label_map_util.create_category_index_from_labelmap( self.PATH_TO_LABELS, use_display_name=True) return category_index
with open(save_path, "w+") as f: json.dump(small_test_list, f, indent=4) # for determining if filtered dataset should be created copy_small_dataset = False # strings with used paths base_photos_dir = "Photos" openimages_test = "Photos/test" tf_openimages_labels = "data/oid_v4_label_map.pbtxt" csv_annotations_path = "data/datasets_annotations/test-annotations-bbox.csv" full_annotations_json = "data/datasets_annotations/openimages_test.json" small_test_json = "data/openimages_test_small_ground_truth.json" small_test = "Photos/openimages_test_small" if not os.path.exists(small_test): os.makedirs(small_test) # category indices for both display object names and actual object names display_names = label_map_util.create_category_index_from_labelmap(tf_openimages_labels, use_display_name=True) true_names = label_map_util.create_category_index_from_labelmap(tf_openimages_labels, use_display_name=False) # merge them into combined dictionary category_dict = get_full_category_index_dict(true_names, display_names) # create JSON files reformat_openimages_annotations(csv_annotations_path, base_photos_dir, category_dict, full_annotations_json) filter_json_annotations(full_annotations_json, small_test, small_test_json, base_photos_dir) # create small test dataset if specified if copy_small_dataset: reduce_dataset_size(10, openimages_test, small_test, full_annotations_json)
APIMODEL_PATH = 'Tensorflow/models' ANNOTATION_PATH = WORKSPACE_PATH + '/annotations' IMAGE_PATH = WORKSPACE_PATH + '/images' MODEL_PATH = WORKSPACE_PATH + '/models' PRETRAINED_MODEL_PATH = WORKSPACE_PATH + '/pre-trained-models' CONFIG_PATH = MODEL_PATH + '/my_ssd_mobnet/pipeline.config' CHECKPOINT_PATH = MODEL_PATH + '/my_ssd_mobnet/' # Load pipeline config and build a detection model configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH) detection_model = model_builder.build(model_config=configs['model'], is_training=False) # Restore checkpoint ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-2')).expect_partial() category_index = label_map_util.create_category_index_from_labelmap( ANNOTATION_PATH + '/label_map.pbtxt') # Setup capture cap = cv2.VideoCapture(0) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) while True: ret, frame = cap.read() image_np = np.array(frame) input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections = detect_fn(input_tensor) num_detections = int(detections.pop('num_detections'))
return output_dict if __name__ == "__main__": start = time.perf_counter() detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, "rb") as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name="") category_index = label_map_util.create_category_index_from_labelmap( LABEL_MAP_PATH, use_display_name=True) image = Image.open(IMAGE_PATH) image_np = np.asarray(image) image_np.setflags(write=True) with detection_graph.as_default(): tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) output_dict = run_inference_for_single_image(image, sess) visualization_utils.visualize_boxes_and_labels_on_image_array( image_np, output_dict["detection_boxes"], output_dict["detection_classes"],
def __init__(self): self.category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS)
print(f'Request returned\n') # Post process output print(f'\n\nPost-processing server response...\n') image = Image.open(image_path).convert("RGB") image_np = load_image_into_numpy_array(image) output_dict = post_process(server_response, image_np.shape) print(f'Post-processing done!\n') # Save output on disk print(f'\n\nSaving output to {output_json}\n\n') save_result(server_response, output_json) if save_output_image: # Save output on disk category_index = label_map_util.create_category_index_from_labelmap( path_to_labels, use_display_name=True) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8, ) Image.fromarray(image_np).save(output_image) print('\n\nImage saved\n\n')
def mask_inference_image_cv(self): # read and Preprocess image img = cv.imread(self.__path_to_images) image_np = img[:, :, [2, 1, 0]] # BGR2RGB # convert images to be a tensor input_tensor = tf.convert_to_tensor(image_np) input_tensor = input_tensor[tf.newaxis, ...] input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.uint8) # Apply inference detections = self.__model(input_tensor) image_np_with_detections = image_np.copy() try: from object_detection.utils import ops as utils_ops from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as viz_utils category_index = label_map_util.create_category_index_from_labelmap( self.__path_to_labels, use_display_name=True) label_id_offset = 0 if 'detection_masks' in detections: detection_masks = tf.convert_to_tensor( detections['detection_masks'][0]) detection_boxes = tf.convert_to_tensor( detections['detection_boxes'][0]) # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_np.shape[0], image_np.shape[1]) detection_masks_reframed = tf.cast( detection_masks_reframed > 0.5, tf.uint8) detections[ 'detection_masks_reframed'] = detection_masks_reframed.numpy( ) viz_utils.visualize_boxes_and_labels_on_image_array( image_np_with_detections, detections['detection_boxes'][0].numpy(), (detections['detection_classes'][0].numpy() + label_id_offset).astype(int), detections['detection_scores'][0].numpy(), category_index, instance_masks=detections.get('detection_masks_reframed', None), use_normalized_coordinates=True, max_boxes_to_draw=100, min_score_thresh=self.__threshold, agnostic_mode=False, line_thickness=2) except ImportError: click.echo( click.style( f"\n The mask wil not be apply. Object detection API is not availabe \n", bold=True, fg='red')) classIds = detections['detection_classes'][0].numpy() scores = detections['detection_scores'][0].numpy() boxes = detections['detection_boxes'][0].numpy() masks = detections['detection_masks'][0].numpy() # Visualize detected bounding boxes. for i in range(boxes.shape[0]): if scores[i] > self.__threshold: score = scores[i] bbox = boxes[i] instance_mask = masks[i] classId = classIds[i] img = self.visualize_bbox_mask_pil( image_np_with_detections, score, bbox, instance_mask, classId) img_path = os.path.join(PATH_DIR_IMAGE_INF, self.__images_name_prefix + "_savedmodel_.png") cv.imwrite(img_path, image_np_with_detections) cv.imshow('TensorFlow Mask-ResNet_new', image_np_with_detections) cv.waitKey(0) print('Done')
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN detection_model = detection_model_fn(is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch(labels, unpad_groundtruth_tensors=train_config. unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = (labels[fields.InputDataFields.groundtruth_boxes]. get_shape().as_list()) unpad_groundtruth_tensors = True if boxes_shape[ 1] is not None else False labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[ fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[ fields.InputDataFields.groundtruth_keypoints] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=labels[ fields.InputDataFields.groundtruth_weights]) preprocessed_images = features[fields.InputDataFields.image] prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) detections = detection_model.postprocess( prediction_dict, features[fields.InputDataFields.true_image_shape]) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config. fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint( train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.itervalues()] if train_config.add_regularization_loss: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if regularization_losses: regularization_loss = tf.add_n(regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict[ 'Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]: # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None if train_config.freeze_variables: trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), exclude_patterns=train_config.freeze_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(detections) } eval_metric_ops = None if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): class_agnostic = (fields.DetectionResultFields.detection_classes not in detections) groundtruth = _get_groundtruth_data(detection_model, class_agnostic) use_original_images = fields.InputDataFields.original_image in features original_images = (features[fields.InputDataFields.original_image] if use_original_images else features[fields.InputDataFields.image]) eval_dict = eval_util.result_dict_for_single_example( original_images[0:1], features[inputs.HASH_KEY][0], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=False) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index( ) else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) img_summary = None if not use_tpu and use_original_images: detection_and_groundtruth = ( vis_utils.draw_side_by_side_evaluation_image( eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2)) img_summary = tf.summary.image( 'Detections_Left_Groundtruth_Right', detection_and_groundtruth) if mode == tf.estimator.ModeKeys.EVAL: # Eval metrics on a single example. eval_metrics = eval_config.metrics_set if not eval_metrics: eval_metrics = ['coco_detection_metrics'] eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_metrics, category_index.values(), eval_dict, include_metrics_per_category=False) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if img_summary is not None: eval_metric_ops['Detections_Left_Groundtruth_Right'] = ( img_summary, tf.no_op()) eval_metric_ops = { str(k): v for k, v in eval_metric_ops.iteritems() } if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: return tf.estimator.EstimatorSpec(mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs)
def model_fn(features, labels, mode, params=None): """Constructs the object detection model. Args: features: Dictionary of feature tensors, returned from `input_fn`. labels: Dictionary of groundtruth tensors if mode is TRAIN or EVAL, otherwise None. mode: Mode key from tf.estimator.ModeKeys. params: Parameter dictionary passed from the estimator. Returns: An `EstimatorSpec` that encapsulates the model and its serving configurations. """ params = params or {} total_loss, train_op, detections, export_outputs = None, None, None, None is_training = mode == tf.estimator.ModeKeys.TRAIN # Make sure to set the Keras learning phase. True during training, # False for inference. tf.keras.backend.set_learning_phase(is_training) detection_model = detection_model_fn( is_training=is_training, add_summaries=(not use_tpu)) scaffold_fn = None if mode == tf.estimator.ModeKeys.TRAIN: labels = unstack_batch( labels, unpad_groundtruth_tensors=train_config.unpad_groundtruth_tensors) elif mode == tf.estimator.ModeKeys.EVAL: # For evaling on train data, it is necessary to check whether groundtruth # must be unpadded. boxes_shape = ( labels[fields.InputDataFields.groundtruth_boxes].get_shape() .as_list()) unpad_groundtruth_tensors = boxes_shape[1] is not None and not use_tpu labels = unstack_batch( labels, unpad_groundtruth_tensors=unpad_groundtruth_tensors) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): gt_boxes_list = labels[fields.InputDataFields.groundtruth_boxes] gt_classes_list = labels[fields.InputDataFields.groundtruth_classes] gt_masks_list = None if fields.InputDataFields.groundtruth_instance_masks in labels: gt_masks_list = labels[ fields.InputDataFields.groundtruth_instance_masks] gt_keypoints_list = None if fields.InputDataFields.groundtruth_keypoints in labels: gt_keypoints_list = labels[fields.InputDataFields.groundtruth_keypoints] gt_weights_list = None if fields.InputDataFields.groundtruth_weights in labels: gt_weights_list = labels[fields.InputDataFields.groundtruth_weights] gt_confidences_list = None if fields.InputDataFields.groundtruth_confidences in labels: gt_confidences_list = labels[ fields.InputDataFields.groundtruth_confidences] gt_is_crowd_list = None if fields.InputDataFields.groundtruth_is_crowd in labels: gt_is_crowd_list = labels[fields.InputDataFields.groundtruth_is_crowd] detection_model.provide_groundtruth( groundtruth_boxes_list=gt_boxes_list, groundtruth_classes_list=gt_classes_list, groundtruth_confidences_list=gt_confidences_list, groundtruth_masks_list=gt_masks_list, groundtruth_keypoints_list=gt_keypoints_list, groundtruth_weights_list=gt_weights_list, groundtruth_is_crowd_list=gt_is_crowd_list) preprocessed_images = features[fields.InputDataFields.image] if use_tpu and train_config.use_bfloat16: with tf.contrib.tpu.bfloat16_scope(): prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) for k, v in prediction_dict.items(): if v.dtype == tf.bfloat16: prediction_dict[k] = tf.cast(v, tf.float32) else: prediction_dict = detection_model.predict( preprocessed_images, features[fields.InputDataFields.true_image_shape]) def postprocess_wrapper(args): return detection_model.postprocess(args[0], args[1]) if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT): if use_tpu and postprocess_on_cpu: detections = tf.contrib.tpu.outside_compilation( postprocess_wrapper, (prediction_dict, features[fields.InputDataFields.true_image_shape])) else: detections = postprocess_wrapper(( prediction_dict, features[fields.InputDataFields.true_image_shape])) if mode == tf.estimator.ModeKeys.TRAIN: if train_config.fine_tune_checkpoint and hparams.load_pretrained: if not train_config.fine_tune_checkpoint_type: # train_config.from_detection_checkpoint field is deprecated. For # backward compatibility, set train_config.fine_tune_checkpoint_type # based on train_config.from_detection_checkpoint. if train_config.from_detection_checkpoint: train_config.fine_tune_checkpoint_type = 'detection' else: train_config.fine_tune_checkpoint_type = 'classification' asg_map = detection_model.restore_map( fine_tune_checkpoint_type=train_config.fine_tune_checkpoint_type, load_all_detection_checkpoint_vars=( train_config.load_all_detection_checkpoint_vars)) available_var_map = ( variables_helper.get_variables_available_in_checkpoint( asg_map, train_config.fine_tune_checkpoint, include_global_step=False)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(train_config.fine_tune_checkpoint, available_var_map) if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): losses_dict = detection_model.loss( prediction_dict, features[fields.InputDataFields.true_image_shape]) losses = [loss_tensor for loss_tensor in losses_dict.values()] if train_config.add_regularization_loss: regularization_losses = detection_model.regularization_losses() if regularization_losses: regularization_loss = tf.add_n( regularization_losses, name='regularization_loss') losses.append(regularization_loss) losses_dict['Loss/regularization_loss'] = regularization_loss total_loss = tf.add_n(losses, name='total_loss') losses_dict['Loss/total_loss'] = total_loss if 'graph_rewriter_config' in configs: graph_rewriter_fn = graph_rewriter_builder.build( configs['graph_rewriter_config'], is_training=is_training) graph_rewriter_fn() # TODO(rathodv): Stop creating optimizer summary vars in EVAL mode once we # can write learning rate summaries on TPU without host calls. global_step = tf.train.get_or_create_global_step() training_optimizer, optimizer_summary_vars = optimizer_builder.build( train_config.optimizer) if mode == tf.estimator.ModeKeys.TRAIN: if use_tpu: training_optimizer = tf.contrib.tpu.CrossShardOptimizer( training_optimizer) # Optionally freeze some layers by setting their gradients to be zero. trainable_variables = None include_variables = ( train_config.update_trainable_variables if train_config.update_trainable_variables else None) exclude_variables = ( train_config.freeze_variables if train_config.freeze_variables else None) trainable_variables = tf.contrib.framework.filter_variables( tf.trainable_variables(), include_patterns=include_variables, exclude_patterns=exclude_variables) clip_gradients_value = None if train_config.gradient_clipping_by_norm > 0: clip_gradients_value = train_config.gradient_clipping_by_norm if not use_tpu: for var in optimizer_summary_vars: tf.summary.scalar(var.op.name, var) summaries = [] if use_tpu else None if train_config.summarize_gradients: summaries = ['gradients', 'gradient_norm', 'global_gradient_norm'] train_op = tf.contrib.layers.optimize_loss( loss=total_loss, global_step=global_step, learning_rate=None, clip_gradients=clip_gradients_value, optimizer=training_optimizer, update_ops=detection_model.updates(), variables=trainable_variables, summaries=summaries, name='') # Preventing scope prefix on all variables. if mode == tf.estimator.ModeKeys.PREDICT: exported_output = exporter_lib.add_output_tensor_nodes(detections) export_outputs = { tf.saved_model.signature_constants.PREDICT_METHOD_NAME: tf.estimator.export.PredictOutput(exported_output) } eval_metric_ops = None scaffold = None if mode == tf.estimator.ModeKeys.EVAL: class_agnostic = ( fields.DetectionResultFields.detection_classes not in detections) groundtruth = _prepare_groundtruth_for_eval( detection_model, class_agnostic, eval_input_config.max_number_of_boxes) use_original_images = fields.InputDataFields.original_image in features if use_original_images: eval_images = features[fields.InputDataFields.original_image] true_image_shapes = tf.slice( features[fields.InputDataFields.true_image_shape], [0, 0], [-1, 3]) original_image_spatial_shapes = features[fields.InputDataFields .original_image_spatial_shape] else: eval_images = features[fields.InputDataFields.image] true_image_shapes = None original_image_spatial_shapes = None eval_dict = eval_util.result_dict_for_batched_example( eval_images, features[inputs.HASH_KEY], detections, groundtruth, class_agnostic=class_agnostic, scale_to_absolute=True, original_image_spatial_shapes=original_image_spatial_shapes, true_image_shapes=true_image_shapes) if class_agnostic: category_index = label_map_util.create_class_agnostic_category_index() else: category_index = label_map_util.create_category_index_from_labelmap( eval_input_config.label_map_path) vis_metric_ops = None if not use_tpu and use_original_images: eval_metric_op_vis = vis_utils.VisualizeSingleFrameDetections( category_index, max_examples_to_draw=eval_config.num_visualizations, max_boxes_to_draw=eval_config.max_num_boxes_to_visualize, min_score_thresh=eval_config.min_score_threshold, use_normalized_coordinates=False) vis_metric_ops = eval_metric_op_vis.get_estimator_eval_metric_ops( eval_dict) # Eval metrics on a single example. eval_metric_ops = eval_util.get_eval_metric_ops_for_evaluators( eval_config, list(category_index.values()), eval_dict) for loss_key, loss_tensor in iter(losses_dict.items()): eval_metric_ops[loss_key] = tf.metrics.mean(loss_tensor) for var in optimizer_summary_vars: eval_metric_ops[var.op.name] = (var, tf.no_op()) if vis_metric_ops is not None: eval_metric_ops.update(vis_metric_ops) eval_metric_ops = {str(k): v for k, v in eval_metric_ops.items()} if eval_config.use_moving_averages: variable_averages = tf.train.ExponentialMovingAverage(0.0) variables_to_restore = variable_averages.variables_to_restore() keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( variables_to_restore, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) scaffold = tf.train.Scaffold(saver=saver) # EVAL executes on CPU, so use regular non-TPU EstimatorSpec. if use_tpu and mode != tf.estimator.ModeKeys.EVAL: return tf.contrib.tpu.TPUEstimatorSpec( mode=mode, scaffold_fn=scaffold_fn, predictions=detections, loss=total_loss, train_op=train_op, eval_metrics=eval_metric_ops, export_outputs=export_outputs) else: if scaffold is None: keep_checkpoint_every_n_hours = ( train_config.keep_checkpoint_every_n_hours) saver = tf.train.Saver( sharded=True, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, save_relative_paths=True) tf.add_to_collection(tf.GraphKeys.SAVERS, saver) scaffold = tf.train.Scaffold(saver=saver) return tf.estimator.EstimatorSpec( mode=mode, predictions=detections, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=export_outputs, scaffold=scaffold)