Пример #1
0
    def __init__(self):
        logger.info('Loading Tensorflow Detection API')

        weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL,
                                cache_dir=os.path.abspath(config.WEIGHT_PATH),
                                cache_subdir='models')

        extract_path = weights_path.replace('.tar.gz', '')
        if not os.path.exists(extract_path):
            tar = tarfile.open(weights_path, "r:gz")
            tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models'))
            tar.close()
        pb_path = os.path.join(extract_path, self.PB_NAME)

        self.graph = tf.Graph()
        with self.graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(pb_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        self.categories = label_map_util.convert_label_map_to_categories(self.label_map,
                                                                         max_num_classes=self.NUM_CLASSES,
                                                                         use_display_name=True)
        self.category_index = label_map_util.create_category_index(self.categories)
Пример #2
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
    tf.gfile.Copy(FLAGS.pipeline_config_path,
                  os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                  overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        eval_config_path=FLAGS.eval_config_path,
        eval_input_config_path=FLAGS.input_config_path)
    for name, config in [('model.config', FLAGS.model_config_path),
                         ('eval.config', FLAGS.eval_config_path),
                         ('input.config', FLAGS.input_config_path)]:
      tf.gfile.Copy(config,
                    os.path.join(FLAGS.eval_dir, name),
                    overwrite=True)

  model_config = configs['model']
  eval_config = configs['eval_config']
  input_config = configs['eval_input_config']
  if FLAGS.eval_training_data:
    input_config = configs['train_input_config']

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

  def get_next(config):
    return dataset_util.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  create_input_dict_fn = functools.partial(get_next, input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=False)

  evaluator.evaluate(
      create_input_dict_fn,
      model_fn,
      eval_config,
      categories,
      FLAGS.checkpoint_dir,
      FLAGS.eval_dir,
      graph_hook_fn=graph_rewriter_fn)
Пример #3
0
 def test_keep_categories_with_unique_id(self):
 label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
 label_map_string = """
   item {
     id:2
     name:'cat'
   }
   item {
     id:1
     name:'child'
   }
   item {
     id:1
     name:'person'
   }
   item {
     id:1
     name:'n00007846'
   }
 """
 text_format.Merge(label_map_string, label_map_proto)
 categories = label_map_util.convert_label_map_to_categories(
     label_map_proto, max_num_classes=3)
 self.assertListEqual([{
     'id': 2,
     'name': u'cat'
 }, {
     'id': 1,
     'name': u'child'
 }], categories)
def read_data_and_evaluate(input_config, eval_config):
  """Reads pre-computed object detections and groundtruth from tf_record.

  Args:
    input_config: input config proto of type
      object_detection.protos.InputReader.
    eval_config: evaluation config proto of type
      object_detection.protos.EvalConfig.

  Returns:
    Evaluated detections metrics.

  Raises:
    ValueError: if input_reader type is not supported or metric type is unknown.
  """
  if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
    input_paths = input_config.tf_record_input_reader.input_path

    label_map = label_map_util.load_labelmap(input_config.label_map_path)
    max_num_classes = max([item.id for item in label_map.item])
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes)

    object_detection_evaluators = evaluator.get_evaluators(
        eval_config, categories)
    # Support a single evaluator
    object_detection_evaluator = object_detection_evaluators[0]

    skipped_images = 0
    processed_images = 0
    for input_path in _generate_filenames(input_paths):
      tf.logging.info('Processing file: {0}'.format(input_path))

      record_iterator = tf.python_io.tf_record_iterator(path=input_path)
      data_parser = tf_example_parser.TfExampleDetectionAndGTParser()

      for string_record in record_iterator:
        tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
                               processed_images)
        processed_images += 1

        example = tf.train.Example()
        example.ParseFromString(string_record)
        decoded_dict = data_parser.parse(example)

        if decoded_dict:
          object_detection_evaluator.add_single_ground_truth_image_info(
              decoded_dict[standard_fields.DetectionResultFields.key],
              decoded_dict)
          object_detection_evaluator.add_single_detected_image_info(
              decoded_dict[standard_fields.DetectionResultFields.key],
              decoded_dict)
        else:
          skipped_images += 1
          tf.logging.info('Skipped images: {0}'.format(skipped_images))

    return object_detection_evaluator.evaluate()

  raise ValueError('Unsupported input_reader_config.')
Пример #5
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        eval_config_path=FLAGS.eval_config_path,
        eval_input_config_path=FLAGS.input_config_path)

  pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
  config_text = text_format.MessageToString(pipeline_proto)
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                     'wb') as f:
    f.write(config_text)

  model_config = configs['model']
  lstm_config = configs['lstm_model']
  eval_config = configs['eval_config']
  input_config = configs['eval_input_config']

  if FLAGS.eval_training_data:
    input_config.external_input_reader.CopyFrom(
        configs['train_input_config'].external_input_reader)
    lstm_config.eval_unroll_length = lstm_config.train_unroll_length

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      lstm_config=lstm_config,
      is_training=False)

  def get_next(config, model_config, lstm_config, unroll_length):
    return seq_dataset_builder.build(config, model_config, lstm_config,
                                     unroll_length)

  create_input_dict_fn = functools.partial(get_next, input_config, model_config,
                                           lstm_config,
                                           lstm_config.eval_unroll_length)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
Пример #6
0
 def test_convert_label_map_to_coco_categories_with_few_classes(self):
   label_map_proto = self._generate_label_map(num_classes=4)
   cat_no_offset = label_map_util.convert_label_map_to_categories(
       label_map_proto, max_num_classes=2)
   expected_categories_list = [{
       'name': u'1',
       'id': 1
   }, {
       'name': u'2',
       'id': 2
   }]
   self.assertListEqual(expected_categories_list, cat_no_offset)
Пример #7
0
 def test_convert_label_map_to_categories_no_label_map(self):
   categories = label_map_util.convert_label_map_to_categories(
       None, max_num_classes=3)
   expected_categories_list = [{
       'name': u'category_1',
       'id': 1
   }, {
       'name': u'category_2',
       'id': 2
   }, {
       'name': u'category_3',
       'id': 3
   }]
   self.assertListEqual(expected_categories_list, categories)
Пример #8
0
 def test_convert_label_map_to_categories(self):
   label_map_proto = self._generate_label_map(num_classes=4)
   categories = label_map_util.convert_label_map_to_categories(
       label_map_proto, max_num_classes=3)
   expected_categories_list = [{
       'name': u'1',
       'id': 1
   }, {
       'name': u'2',
       'id': 2
   }, {
       'name': u'3',
       'id': 3
   }]
   self.assertListEqual(expected_categories_list, categories)
Пример #9
0
  def __init__(self):
    self.detection_graph = tf.Graph()
    with self.detection_graph.as_default():
      od_graph_def = tf.GraphDef()
      with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    with self.detection_graph.as_default():
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    self.tensor_dict = tensor_dict
    self.image_tensor = image_tensor
    self.label_map = label_map
    self.category_index = category_index
    self.session = tf.Session(graph=self.detection_graph)
Пример #10
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  if FLAGS.pipeline_config_path:
    model_config, eval_config, input_config = get_configs_from_pipeline_file()
  else:
    model_config, eval_config, input_config = get_configs_from_multiple_files()

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

  create_input_dict_fn = functools.partial(
      input_reader_builder.build,
      input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
Пример #11
0
def get_label_index(label_path, num_classes):
    label_map = label_map_util.load_labelmap(label_path)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=num_classes, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return category_index
Пример #12
0
def main(args):
    my_flag = False;
    svo_filepath = None
    if len(args) > 1:
        svo_filepath = args[1]


    rospy.init_node('Human')
    human_pub=rospy.Publisher('human_dis', String, queue_size=1)
    rate=rospy.Rate(10)

    # This main thread will run the object detection, the capture thread is loaded later

    # What model to download and load
    #MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28'
    MODEL_NAME = 'ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
    #MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
    #MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28'
    #MODEL_NAME = 'faster_rcnn_nas_coco_2018_01_28' # Accurate but heavy

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_FROZEN_GRAPH = 'data/' + MODEL_NAME + '/frozen_inference_graph.pb'

    # Check if the model is already present
    if not os.path.isfile(PATH_TO_FROZEN_GRAPH):
        print("Downloading model " + MODEL_NAME + "...")

        MODEL_FILE = MODEL_NAME + '.tar.gz'
        MODEL_PATH = 'data/' + MODEL_NAME + '.tar.gz'
        DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        opener = urllib.request.URLopener()
        opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_PATH)
        tar_file = tarfile.open(MODEL_PATH)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, 'data/')

    # List of the strings that is used to add correct label for each box.
    PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
    NUM_CLASSES = 90

    # Start the capture thread with the ZED input
    print("Starting the ZED")
    capture_thread = Thread(target=capture_thread_func, kwargs={'svo_filepath': svo_filepath})
    capture_thread.start()
    # Shared resources
    global image_np_global, depth_np_global, new_data, exit_signal

    # Load a (frozen) Tensorflow model into memory.
    print("Loading model " + MODEL_NAME)
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # Limit to a maximum of 50% the GPU memory usage taken by TF https://www.tensorflow.org/guide/using_gpu
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5

    # Loading label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                                use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Detection
    with detection_graph.as_default():
        with tf.Session(config=config, graph=detection_graph) as sess:
            while not exit_signal:
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                if new_data:
                    lock.acquire()
                    image_np = np.copy(image_np_global)
                    depth_np = np.copy(depth_np_global)
                    new_data = False
                    lock.release()

                    image_np_expanded = np.expand_dims(image_np, axis=0)

                    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
                    # Each box represents a part of the image where a particular object was detected.
                    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                    # Each score represent how level of confidence for each of the objects.
                    # Score is shown on the result image, together with the class label.
                    scores = detection_graph.get_tensor_by_name('detection_scores:0')
                    classes = detection_graph.get_tensor_by_name('detection_classes:0')

                    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
                    # Actual detection.
                    (boxes, scores, classes, num_detections) = sess.run(
                        [boxes, scores, classes, num_detections],
                        feed_dict={image_tensor: image_np_expanded})

                    num_detections_ = num_detections.astype(int)[0]
                    # print(np.squeeze(scores))
                    # Visualization of the results of a detection.
                    image_np = display_objects_distances(
                        image_np,
                        depth_np,
                        num_detections_,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,human_pub)

                    cv2.imshow('ZED object detection', cv2.resize(image_np, (width, height)))
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        cv2.destroyAllWindows()
                        exit_signal = True
                else:
                    sleep(0.01)

            sess.close()

    exit_signal = True
    capture_thread.join()
Пример #13
0
if __name__ == "__main__":
    interpreter = interpreter_wrapper.Interpreter(model_path=model_file)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # check the type of the input tensor
    if input_details[0]['dtype'] == type(np.float32(1.0)):
        floating_model = True

    labels = load_labels(label_file)

    label_map = label_map_util.load_labelmap(label_map_path)
    max_num_classes = max([item.id for item in label_map.item])
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes)

    evaluators = get_evaluators(categories)
    counters = {'skipped': 0, 'success': 0}

    images_paths, annnots_paths = get_all_examples(test_data_file_path)
    num_examples = len(images_paths)

    #try:
    for batch in range(num_examples):
        file_name = images_paths[batch]
        annot_name = annnots_paths[batch]
        img = cv2.imread(file_name)
        img_shape = img.shape

        if (batch + 1) % 100 == 0:
Пример #14
0
def run_inference_graph_images(PATH_TO_FROZEN_GRAPH, PATH_TO_LABELS, \
                               NUM_CLASSES=1, TEST_IMAGE_PATHS, min_threshold, \
                               bb_outpath, PATH_TO_BB_HASHMAP):
    """This function takes in a list of image local-paths and runs it through the trained graph.
    Further, using the visualization function it draws bounding boxes on each of the images and 
    saves it in a local path.
    Arguments:
        PATH_TO_FROZEN_GRAPH - local path of the trained frozen graph, '/frozen_inference_graph.pb'
        PATH_TO_LABELS - local path of the labels (a mapping from class number to class name), 'label_map_focus.pbtxt'
        NUM_CLASSES - number of detection classes
        TEST_IMAGE_PATHS - list of test image local paths
        min_threshold - minimum score threshold for the bounding box to be considered
        bb_outpath - local path where to save the images with ounding poxes, /home/ubuntu/data/tensorflow/my_workspace/camera-trap-detection/snapshot-safari/snapshot-serengeti/subject_set_upload/
        PATH_TO_BB_HASHMAP - path where bounding box information for the subjects is saved
        
        """
    # Loading the frozen graph
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    bb_hashmap = {}
    for image_path in TEST_IMAGE_PATHS:
        image = Image.open(image_path)
        if (len(np.array(image).shape) == 3):
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)

            # Considering the default dpi of matplotlib, calculating the figure size to save
            y0, x0, c = image_np.shape
            h = y0 / 72  # the default for dpi for matplotlib is 72
            w = x0 / 72  # the default for dpi for matplotlib is 72
            IMAGE_SIZE = (w, h)

            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            # Actual detection.
            output_dict = run_inference_for_single_image(
                image_np, detection_graph)
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                output_dict['detection_boxes'],
                output_dict['detection_classes'],
                output_dict['detection_scores'],
                category_index,
                instance_masks=output_dict.get('detection_masks'),
                use_normalized_coordinates=True,
                line_thickness=8,
                min_score_thresh=min_threshold,
                skip_labels=True,
                skip_scores=True,
                agnostic_mode=True)

            # make a figure without the frame
            fig = plt.figure(frameon=False, figsize=IMAGE_SIZE)
            # make the content fill the whole figure
            ax = plt.Axes(fig, [0., 0., 1., 1.])
            ax.set_axis_off()
            fig.add_axes(ax)
            # draw your image
            ax.imshow(image_np)
            plt.savefig(
                os.path.join(bb_outpath, '{0}'.format(
                    image_path[-14:])))  # saving image with boxes on the disk
            plt.gcf().clear()
            bb_hashmap[image_path[-14:]] = {
                'detection_boxes':
                output_dict['detection_boxes'][0:sum(
                    output_dict['detection_scores'] >= min_threshold)],
                'detection_scores':
                output_dict['detection_scores'][0:sum(
                    output_dict['detection_scores'] >= min_threshold)]
            }

    with open(PATH_TO_BB_HASHMAP, 'w') as f:
        for key in bb_hashmap.keys():
            f.write("%s,%s\n" % (key, bb_hashmap[key]))

    return bb_hashmap
Пример #15
0
PATH_TO_LABELS = os.path.join(CWD_PATH, 'saved_inference_graph_models',
                              'labelmap.pbtxt')

# Path to image
PATH_TO_IMAGE = os.path.join(CWD_PATH, IMAGE_NAME)

# Number of classes the object detector can identify
NUM_CLASSES = 1

# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

# Define input and output tensors (i.e. data) for the object detection classifier
Пример #16
0
# create a context manager that makes this model the default one for
# execution
with model.as_default():
    # initialize the graph definition
    graphDef = tf.GraphDef()

    # load the graph from disk
    with tf.gfile.GFile(args["model"], "rb") as f:
        serializedGraph = f.read()
        graphDef.ParseFromString(serializedGraph)
        tf.import_graph_def(graphDef, name="")

# load the class labels from disk
labelMap = label_map_util.load_labelmap(args["labels"])
categories = label_map_util.convert_label_map_to_categories(
    labelMap, max_num_classes=args["num_classes"], use_display_name=True)
categoryIdx = label_map_util.create_category_index(categories)

# create a session to perform inference
with model.as_default():
    with tf.Session(graph=model) as sess:
        # initialize the points to the video files
        stream = cv2.VideoCapture(args["input"])
        writer = None

        # loop over frames from the video file stream
        while True:
            # grab the next frame
            (grabbed, image) = stream.read()

            # if the frame was not grabbed, then we have reached the
Пример #17
0
def main():
    # current camera frame
    global frame, annotatedFrame, frameQueue, currentFps, selectedIdx, selectedClassName, objectDistance, boxes, scores, stats
    global currentMode, M_AUTOMANEUVER, M_AUTONAV, M_MANUAL

    # print(cv2.getBuildInformation())
    print("Loading model")
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(CHKPT_PATH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(LABELS_PATH)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=2, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    print("Starting main python module")
    if not DEBUG_DISABLE_FLIGHT:
        flightData = Drone(updateFlightInfo)
        process = Thread(target=flight.flightMain, args=(flightData, ))
        process.start()
    ip = '0.0.0.0'
    server = ThreadedHTTPServer((ip, 9090), CamHandler)
    target = Thread(target=server.serve_forever, args=())
    i = 0

    # To flip the image, modify the flip_method parameter (0 and 2 are the most common)
    #print(gstreamer_pipeline(flip_method=0))
    cap = cv2.VideoCapture(gstreamer_pipeline(flip_method=2),
                           cv2.CAP_GSTREAMER)
    fpsSmoothing = 70
    lastUpdate = time.time()
    try:
        if cap.isOpened():
            print("CSI Camera opened")
            graph_options = tf.GraphOptions(
                optimizer_options=tf.OptimizerOptions(
                    opt_level=tf.OptimizerOptions.L1, ))
            OptConfig = tf.ConfigProto(graph_options=graph_options)
            with detection_graph.as_default():
                with tf.Session(graph=detection_graph,
                                config=OptConfig) as sess:
                    # Definite input and output Tensors for detection_graph
                    image_tensor = detection_graph.get_tensor_by_name(
                        'image_tensor:0')
                    # Each box represents a part of the image where a particular object
                    # was detected.
                    detection_boxes = detection_graph.get_tensor_by_name(
                        'detection_boxes:0')
                    # Each score represent how level of confidence for each of the objects.
                    # Score is shown on the result image, together with the class
                    # label.
                    detection_scores = detection_graph.get_tensor_by_name(
                        'detection_scores:0')
                    detection_classes = detection_graph.get_tensor_by_name(
                        'detection_classes:0')
                    num_detections = detection_graph.get_tensor_by_name(
                        'num_detections:0')
                    i = 0
                    print("TensorFlow session loaded.")
                    while mainThreadRunning:
                        ret_val, img = cap.read()
                        frame = img
                        # convert OpenCV's BGR to RGB as the model
                        # was trained on RGB images
                        color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        # resize image to model size of 360x270
                        color_frame = cv2.resize(color_frame, (360, 270),
                                                 interpolation=cv2.INTER_CUBIC)
                        image_np_expanded = np.expand_dims(color_frame, axis=0)
                        # Actual detection
                        (boxes, scores, classes, num) = sess.run(
                            [
                                detection_boxes, detection_scores,
                                detection_classes, num_detections
                            ],
                            feed_dict={image_tensor: image_np_expanded})

                        # Draw boxes using TF library, should be off during competition
                        if useBoxVisualization:
                            vis_util.visualize_boxes_and_labels_on_image_array(
                                frame,
                                np.squeeze(boxes),
                                np.squeeze(classes).astype(np.int32),
                                np.squeeze(scores),
                                category_index,
                                use_normalized_coordinates=True,
                                line_thickness=4,
                                min_score_thresh=MIN_CONFIDENCE)

                        # Now that we have the detected BBoxes, it's time to determine our current obstacle
                        # First, gather stats about the bounding boxes
                        # squeezing makes it so you can do access box[i] directly instead of having to
                        # access box[0][i]
                        boxes = np.squeeze(boxes)
                        classes = np.squeeze(classes)
                        scores = np.squeeze(scores)
                        stats = []
                        j = 0
                        # This is 15ft, any object farther than that is a misidentification
                        lowestDistance = 15

                        if DEBUG_DUMP_DETECTIONS:
                            print("Boxes // Classes // Scores")
                            print(boxes)
                            print(classes)
                            print(scores)
                        # Reset selections
                        selectedIdx = None
                        if len(boxes) > 0:
                            for j in range(0, len(boxes)):
                                if scores[j] >= MIN_CONFIDENCE:
                                    stats.insert(
                                        j, getBoxStats(boxes[j], classes[j]))
                                    # print("box[%d] distance is %f" % (j, stats[j]['distance']))
                                    if stats[j]['distance'] < lowestDistance:
                                        selectedIdx = j
                                        selectedClassName = classToString(
                                            classes[j])
                                        objectDistance = stats[j]['distance']
                                        lowestDistance = objectDistance
                                        #print("Selected box[%d]: distance %f class %s conf %f" % (j, objectDistance, selectedClassName, scores[j]))
                                else:
                                    # Skip calculations on this box if it does not meet
                                    # confidence threshold
                                    stats.insert(j, 0)
                        if not DEBUG_DISABLE_FLIGHT:
                            if selectedIdx is not None:
                                flightData.upData(stats[selectedIdx],
                                                  selectedClassName)
                            else:
                                flightData.upData(None, "None")

                        # add the HUD to the current image
                        annotatedFrame = applyHud()
                        # currentFrameTime = time.time()
                        #if frameQueue.full():
                        #    with frameQueue.mutex:
                        #        frameQueue.queue.clear()
                        frameQueue.put(annotatedFrame.copy())
                        if i == 0:
                            target.start()
                            print("Starting MJPEG stream")
                        i += 1
                        # FPS smoothing algorithm
                        frameTime = time.time() - lastUpdate
                        frameFps = 1 / frameTime
                        currentFps += (frameFps - currentFps) / fpsSmoothing
                        lastUpdate = time.time()

                    cap.release()
        else:
            print("FATAL: Unable to open camera")

    except KeyboardInterrupt:
        sys.exit()
Пример #18
0
def main():
    # This main thread will run the object detection, the capture thread is loaded later

    # Some values standing for useful files
    PATH_TO_FROZEN_GRAPH = 'model/frozen_inference_graph.pb'
    PATH_TO_LABELS = 'model/labelmap.pbtxt'
    NUM_CLASSES = 1

    # Starting the ZED capture
    print("Starting the ZED")
    capture_thread = Thread(target=capture_thread_func)
    capture_thread.start()

    # Sharing variables used by threads
    global image_np_global, depth_np_global, new_data, exit_signal

    # Load a (frozen) Tensorflow model into memory.
    print("Loading model")
    detection_graph = tf.Graph()

    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()

        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.8

    # Loading label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Detection
    with detection_graph.as_default() and tf.Session(
            config=config, graph=detection_graph) as sess:
        while not exit_signal:
            if new_data:
                lock.acquire()
                image_np = np.copy(image_np_global)
                depth_np = np.copy(depth_np_global)
                new_data = False
                lock.release()

                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)

                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')

                # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                num_detections_ = num_detections.astype(int)[0]

                # Visualization of the results of a detection and storing targets positions
                image_np, voi.target_list = display_objects_distances(
                    image_np, depth_np, num_detections_, np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32), np.squeeze(scores),
                    category_index)

                # Triggering robot
                zed_robot.set_ang_and_vel(voi.target_list, voi.coord[:2],
                                          voi.rotation[2] + 90)

                #read lidar
                # lidar_points = lidar.read()

                #print(depth_np_global[50][50])
                # print(lidar_points)

                # Displaying image through OpenCV
                cv2.imshow('ZED object detection',
                           cv2.resize(image_np, (width, height)))

                if cv2.waitKey(10) & 0xFF == ord('q'):
                    cv2.destroyAllWindows()
                    exit_signal = True

            else:
                sleep(0.01)

        sess.close()

    exit_signal = True
    capture_thread.join()
Пример #19
0
 def create_category_index(path_labels_map):
     num_classes = 6
     label_map = label_map_util.load_labelmap(path_labels_map)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=num_classes, use_display_name=True)
     return label_map_util.create_category_index(categories)
Пример #20
0
parser.add_argument(
    "-max-b",
    "--max-boxes",
    dest='max_boxes',
    type=int,
    default=DETECTION_CONFIG["max_boxes_to_draw"],
    help="Max number of boxes to draw at a time, default is {default}.".format(
        default=DETECTION_CONFIG["max_boxes_to_draw"]))

args = parser.parse_args()

# Load labelmap file.
label_map = label_map_util.load_labelmap(DETECTION_CONFIG["labelmap_path"])
categories = label_map_util.convert_label_map_to_categories(
    label_map,
    max_num_classes=DETECTION_CONFIG["num_classes"],
    use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Loads a frozen Tensorflow model in memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(FROZEN_MODEL_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')


def detect_items(image_path, session):
    # Read input image.
def test(pipeline_config_path, model_dir, label_map_path, test_data_dir,
         inference_dir):
    Path(inference_dir).mkdir(parents=True, exist_ok=True)

    configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
    model_config = configs['model']
    detection_model = model_builder.build(model_config=model_config,
                                          is_training=False)
    ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
    ckpt.restore(model_dir)

    # detect_fn = get_model_detection_function(detection_model)
    label_map = label_map_util.load_labelmap(label_map_path)
    categories = label_map_util.convert_label_map_to_categories(
        label_map,
        max_num_classes=label_map_util.get_max_label_map_index(label_map),
        use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    all_image = get_all_image_files(test_data_dir)

    for image_path in all_image:
        file_name = os.path.basename(image_path)
        name, image_format = os.path.splitext(image_path)
        try:
            image_np = load_image_into_numpy_array(image_path)

            input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0),
                                                dtype=tf.float32)
            detections, predictions_dict, shapes = detect_fn(
                detection_model, input_tensor)
            width = shapes.numpy()[1]
            height = shapes.numpy()[0]
            boxes = detections['detection_boxes'][0].numpy()
            classes = (detections['detection_classes'][0].numpy() +
                       1).astype(int)
            scores = detections['detection_scores'][0].numpy()
            display_str = f'image : {file_name}'
            for i in range(boxes.shape[0]):
                score = round(100 * scores[i])
                if score >= 25:
                    # print(boxes[i])
                    display_str = f'{display_str} / {category_index[classes[i]]["name"]}: {str(round(100 * scores[i]))}% ' \
                                  f'({str(boxes[i])})'
                    ## xmin, ymin, xmax, ymax

            print(display_str)

            label_id_offset = 1
            image_np_with_detections = image_np.copy()

            viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'][0].numpy(),
                (detections['detection_classes'][0].numpy() +
                 label_id_offset).astype(int),
                detections['detection_scores'][0].numpy(),
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=200,
                min_score_thresh=.25,
                agnostic_mode=False,
            )

            save_img = cv2.cvtColor(image_np_with_detections,
                                    cv2.COLOR_BGR2RGB)

            class_dir = os.path.dirname(image_path).split('/')[-1]
            Path(os.path.join(inference_dir, class_dir)).mkdir(parents=True,
                                                               exist_ok=True)
            shutil.copy(
                f'{name}-o.csv',
                os.path.join(inference_dir, class_dir,
                             f'{os.path.basename(name)}-o.csv'))
            cv2.imwrite(os.path.join(inference_dir, class_dir, file_name),
                        save_img)
        except Exception as e:
            print(f'### Exception : {file_name} - {str(e)}')
Пример #22
0
def detect_in_video(video_path):
    # VideoWriter is the responsible of creating a copy of the video
    # used for the detections but with the detections overlays. Keep in
    # mind the frame size has to be the same as original video.
    # out = cv2.VideoWriter('../temp/' + 'WIN_20191218_11_03_57_Pro.mp4', cv2.VideoWriter_fourcc(
    #    'M', 'J', 'P', 'G'), 10, (1280, 720))

    if is_yolo:
        print('yolo!')
        configuration = tf.ConfigProto(device_count={"GPU": 0})
        sess = tf.Session(config=configuration)
        input_data = tf.placeholder(tf.float32,
                                    [1, new_size[1], new_size[0], 3],
                                    name='input_data')
        yolo_model = yolov3(num_class, anchors)
        with tf.variable_scope('yolov3'):
            pred_feature_maps = yolo_model.forward(input_data, False)
        pred_boxes, pred_confs, pred_probs = yolo_model.predict(
            pred_feature_maps)

        pred_scores = pred_confs * pred_probs

        boxes, scores, labels = gpu_nms(pred_boxes,
                                        pred_scores,
                                        num_class,
                                        max_boxes=1,
                                        score_thresh=0.2,
                                        nms_thresh=0.45)

        saver = tf.train.Saver()
        saver.restore(sess, restore_path)
    else:
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
            configuration = tf.ConfigProto(device_count={"GPU": 0})
            sess = tf.Session(config=configuration, graph=detection_graph)

            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object
            # was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class
            # label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)

    frame_statistics = []
    frame_id = 1
    is_skip_frame = True
    frame_skip_count = 0

    # Создать директорию с кадрами для заданного видео
    video_base_name = os.path.basename(video_path)
    video_name = os.path.splitext(video_base_name)[0]
    video_dir = join(os.path.dirname(video_path), video_name)
    images_dir = "images"
    video_images_dir = join(video_dir, images_dir)

    if not os.path.exists(video_images_dir):
        os.makedirs(video_images_dir)
    else:
        # Удалить все кадры из целевой директории
        remove_files_in_dir(video_images_dir)

    video_images_dir_rat = join(video_images_dir, 'rat')
    video_images_dir_mouse = join(video_images_dir, 'mouse')
    os.makedirs(video_images_dir_rat, exist_ok=True)
    os.makedirs(video_images_dir_mouse, exist_ok=True)
    remove_files_in_dir(video_images_dir_rat)
    remove_files_in_dir(video_images_dir_mouse)

    # Загрузка видео
    cap = cv2.VideoCapture(video_path)
    video_frame_cnt = int(cap.get(7))
    video_width = int(cap.get(3))
    video_height = int(cap.get(4))
    video_fps = int(cap.get(5))

    # Узнать разрешение видео
    video_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

    # Указать разрешение картинок

    cur_dir = os.getcwd()
    os.chdir(video_images_dir)
    while cap.isOpened():
        # Read the frame
        ret, frame = cap.read()
        if frame is not None:
            # Recolor the frame. By default, OpenCV uses BGR color space.
            # This short blog post explains this better:
            # https://www.learnopencv.com/why-does-opencv-use-bgr-color-format/
            # color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            if not is_skip_frame:
                if is_yolo:
                    print('yoloo!!')
                    if is_letterbox_resize:
                        img, resize_ratio, dw, dh = letterbox_resize(
                            frame, new_size[0], new_size[1])
                    else:
                        height_ori, width_ori = frame.shape[:2]
                        img = cv2.resize(frame, tuple(new_size))
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = np.asarray(img, np.float32)
                    img = img[np.newaxis, :] / 255.

                    start_time = time.time()
                    boxes_, scores_, labels_ = sess.run(
                        [boxes, scores, labels], feed_dict={input_data: img})
                    end_time = time.time()

                    # rescale the coordinates to the original image
                    if is_letterbox_resize:
                        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] -
                                             dw) / resize_ratio
                        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] -
                                             dh) / resize_ratio
                    else:
                        boxes_[:, [0, 2]] *= (width_ori / float(new_size[0]))
                        boxes_[:, [1, 3]] *= (height_ori / float(new_size[1]))

                    for i in range(len(boxes_)):
                        if scores_[i] == max(scores_):
                            x0, y0, x1, y1 = boxes_[i]
                            plot_one_box(frame, [x0, y0, x1, y1],
                                         label=classes_yolo[labels_[i]] +
                                         ', {:.2f}%'.format(scores_[i] * 100),
                                         color=color_table[labels_[i]])

                            rodent_confidence = scores_[i]
                            rodent_class_id = labels_[i] + 1
                            rodent_class_name = classes_yolo[labels_[i]]
                            if rodent_confidence >= .20:
                                frame_statistics.append({
                                    'frame_id':
                                    frame_id,
                                    'confidence':
                                    rodent_confidence,
                                    'rodent_class_id':
                                    rodent_class_id,
                                    'rodent_class_name':
                                    rodent_class_name,
                                })

                                # Сохранить кадр
                                frame_name = rodent_class_name + '/image' + str(
                                    frame_id) + '.jpg'
                                cv2.imwrite(frame_name, frame)

                                # Сохранить xml-файл
                                #scores = np.squeeze(scores[0])

                                #bbox_coords = boxes[0]
                                #writer = Writer('.', video_width, video_height)
                                #writer.addObject(rodent_class_name, bbox_coords[1] * video_width,
                                #bbox_coords[0] * video_height, bbox_coords[3] * video_width,
                                #bbox_coords[2] * video_height)
                                #writer.save('image' + str(frame_id) + '.xml')

                            #else:
                            # Сохранить кадр
                            #frame_name = 'image' + str(frame_id) + '.jpg'
                            #cv2.imwrite(frame_name, frame)

                    cv2.putText(frame,
                                '{:.2f}ms'.format(
                                    (end_time - start_time) * 1000), (40, 40),
                                0,
                                fontScale=1,
                                color=(0, 255, 0),
                                thickness=2)

                else:
                    image_np_expanded = np.expand_dims(frame, axis=0)

                    # Actual detection.
                    (boxes, scores, classes, num) = sess.run(
                        [
                            detection_boxes, detection_scores,
                            detection_classes, num_detections
                        ],
                        feed_dict={image_tensor: image_np_expanded})

                    # Visualization of the results of a detection.
                    # note: perform the detections using a higher threshold
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        frame,
                        np.squeeze(boxes[0]),
                        np.squeeze(classes[0]).astype(np.int32),
                        np.squeeze(scores[0]),
                        category_index,
                        use_normalized_coordinates=True,
                        line_thickness=8,
                        max_boxes_to_draw=1,
                        min_score_thresh=.20)

                # rodent_confidence = np.squeeze(scores[0])[0]
                # rodent_class_id = np.squeeze(classes[0]).astype(np.int32)[0]
                # rodent_class_name = category_index[rodent_class_id]['name']
                # if rodent_confidence > .20:
                #     frame_statistics.append({'frame_id': frame_id,
                #                              'confidence': rodent_confidence,
                #                              'rodent_class_id': rodent_class_id,
                #                              'rodent_class_name': rodent_class_name,
                #                              })
                #
                #     # Сохранить кадр
                #     frame_name = rodent_class_name + '/image' + str(frame_id) + '.jpg'
                #     cv2.imwrite(frame_name, frame)
                #
                #     # Сохранить xml-файл
                #     scores = np.squeeze(scores[0])
                #     for i in range(min(1, np.squeeze(boxes[0]).shape[0])):
                #         if scores is None or scores[i] > .20:
                #             boxes = tuple(boxes[i].tolist())
                #
                #     bbox_coords = boxes[0]
                #     writer = Writer('.', video_width, video_height)
                #     writer.addObject(rodent_class_name, bbox_coords[1] * video_width,
                #                      bbox_coords[0] * video_height, bbox_coords[3] * video_width,
                #                      bbox_coords[2] * video_height)
                #     writer.save('image' + str(frame_id) + '.xml')
                # else:
                #     # Сохранить кадр
                #     frame_name = 'image' + str(frame_id) + '.jpg'
                #     cv2.imwrite(frame_name, frame)

            cv2.imshow('frame', cv2.resize(frame, (800, 600)))
            output_rgb = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            # out.write(output_rgb

            # Пропустить кадр, если необходимо
            if is_skip_frame:
                while 1:
                    key = cv2.waitKey(1)
                    if key == 32:  # Нажата клавиша "space"
                        frame_skip_count += 1
                        print("Вы пропустили " + str(frame_skip_count) +
                              " кадр")
                        break
                    elif key == 113 or key == 233:  # Нажата клавиша 'q' ('й')
                        is_skip_frame = False
                        break

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            frame_id += 1

    # out.release()
    os.chdir(cur_dir)
    cap.release()
    cv2.destroyAllWindows()

    statistics = {
        'frame_count': frame_id,  # Количество кадров
        'frame_skip_count': frame_skip_count,  # Количество пропущенных кадров
        'frame_rodent_count': 0,  # Количество кадров с грызуном
        'frame_rat_count': 0,  # Количество кадров с крысой
        'frame_mouse_count': 0,  # Количество кадров с мышью
        'sum_confidence_rat': 0,  # Сумма вероятностей крысы на видео
        'sum_confidence_mouse': 0,  # Сумма вероятностей мыши на видео
        'mean_confidence_rat': 0,  # Средняя вероятность крысы на видео
        'mean_confidence_mouse': 0  # Средняя вероятность мыши на видео
    }

    for frame_statistic in frame_statistics:
        if frame_statistic['rodent_class_name'] == 'rat':
            statistics['frame_rodent_count'] += 1
            statistics['frame_rat_count'] += 1
            statistics['sum_confidence_rat'] += frame_statistic['confidence']
            statistics['mean_confidence_rat'] = statistics[
                'sum_confidence_rat'] / statistics['frame_rat_count']
        elif frame_statistic['rodent_class_name'] == 'mouse':
            statistics['frame_rodent_count'] += 1
            statistics['frame_mouse_count'] += 1
            statistics['sum_confidence_mouse'] += frame_statistic['confidence']
            statistics['mean_confidence_mouse'] = statistics[
                'sum_confidence_mouse'] / statistics['frame_mouse_count']

    print('----->>> Результаты обнаружения <<<-----')
    print('Количество кадров: ' + str(statistics['frame_count']))
    print('Количество пропущенных кадров: ' +
          str(statistics['frame_skip_count']))
    print('Количество кадров с грызуном: ' +
          str(statistics['frame_rodent_count']))
    print('Количество кадров с крысой: ' + str(statistics['frame_rat_count']))
    print('Количество кадров с мышью: ' + str(statistics['frame_mouse_count']))
    print('Средняя вероятность крысы на видео: ' +
          str(statistics['mean_confidence_rat']))
    print('Средняя вероятность мыши на видео: ' +
          str(statistics['mean_confidence_mouse']))
Пример #23
0
def load_label_map():
    global category_index, PATH_TO_LABELS, NUM_CLASSES
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
Пример #24
0
    def run(self):
        time1 = time.time()
        MIN_ratio = 0.9

        # MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28'
        GRAPH_FILE_NAME = 'frozen_inference_graph.pb'
        LABEL_FILE = 'data/mscoco_label_map.pbtxt'
        NUM_CLASSES = 90
        # end define

        label_map = lmu.load_labelmap(LABEL_FILE)
        categories = lmu.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        categories_index = lmu.create_category_index(categories)

        print("call label_map & categories : %0.5f" % (time.time() - time1))

        graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME

        # thread function
        def find_detection_target(categories_index, classes, scores):
            time1_1 = time.time()  # 스레드함수 시작시간
            print("스레드 시작")

            objects = []  # 리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  # 딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                        scores[0][index]
                    objects.append(object_dict)  # 리스트 추가
            print(objects)

            print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1))

        # end thread function

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(graph_file, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            sses = tf.Session(graph=detection_graph)

        print("store in memoey time : %0.5f" % (time.time() - time1))

        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')

        detection_scores = detection_graph.get_tensor_by_name(
            'detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name(
            'detection_classes:0')

        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        print("make tensor time : %0.5f" % (time.time() - time1))

        prevtime = 0

        # thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성
        print("road Video time : %0.5f" % (time.time() - time1))

        while True:
            ret, frame = capture.read()
            frame_expanded = np.expand_dims(frame, axis=0)
            height, width, channel = frame.shape

            (boxes, scores, classes, nums) = sses.run(  # np.ndarray
                [
                    detection_boxes, detection_scores, detection_classes,
                    num_detections
                ],
                feed_dict={image_tensor: frame_expanded})  # end sses.run()

            # objects = [] #리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  # 딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                        scores[0][index]
                    # objects.append(object_dict) #리스트 추가

                    # visualize_boxes_and_labels_on_image_array box_size_info 이미지 정
                    # for box, color in box_to_color_map.items():
                    #    ymin, xmin, ymax, xmax = box
                    # [index][0] [1]   [2]  [3]

                    ymin = int((boxes[0][index][0] * height))
                    xmin = int((boxes[0][index][1] * width))
                    ymax = int((boxes[0][index][2] * height))
                    xmax = int((boxes[0][index][3] * width))

                    Result = frame[ymin:ymax, xmin:xmax]
                    cv2.imwrite('car.jpg', Result)
                    print('b')
                    try:
                        result_chars = NP.number_recognition('car.jpg')
                        ui.label_6.setText(result_chars)
                        # print(NP.check())

                    except:
                        print("응안돼")

            # print(objects)

            key = cv2.waitKey(1) & 0xFF

            if key == ord("q"):
                break
def read_data_and_evaluate(input_config, eval_config):
    """Reads pre-computed object detections and groundtruth from tf_record.

    Args:
      input_config: input config proto of type
        object_detection.protos.InputReader.
      eval_config: evaluation config proto of type
        object_detection.protos.EvalConfig.

    Returns:
      Evaluated detections metrics.

    Raises:
      ValueError: if input_reader type is not supported or metric type is unknown.
    """
    if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
        input_paths = input_config.tf_record_input_reader.input_path

        label_map = label_map_util.load_labelmap(input_config.label_map_path)
        max_num_classes = max([item.id for item in label_map.item])
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes)

        object_detection_evaluators = evaluator.get_evaluators(
            eval_config, categories)
        # Support a single evaluator
        object_detection_evaluator = object_detection_evaluators[0]

        skipped_images = 0
        processed_images = 0
        for input_path in _generate_filenames(input_paths):
            tf.logging.info('Processing file: {0}'.format(input_path))

            record_iterator = tf.python_io.tf_record_iterator(path=input_path)
            data_parser = tf_example_parser.TfExampleDetectionAndGTParser()

            for string_record in record_iterator:
                tf.logging.log_every_n(tf.logging.INFO,
                                       'Processed %d images...', 1000,
                                       processed_images)
                processed_images += 1

                example = tf.train.Example()
                example.ParseFromString(string_record)
                decoded_dict = data_parser.parse(example)

                if decoded_dict:
                    object_detection_evaluator.add_single_ground_truth_image_info(
                        decoded_dict[
                            standard_fields.DetectionResultFields.key],
                        decoded_dict)
                    object_detection_evaluator.add_single_detected_image_info(
                        decoded_dict[
                            standard_fields.DetectionResultFields.key],
                        decoded_dict)
                else:
                    skipped_images += 1
                    tf.logging.info(
                        'Skipped images: {0}'.format(skipped_images))

        return object_detection_evaluator.evaluate()

    raise ValueError('Unsupported input_reader_config.')
Пример #26
0
BASE_DIR = os.path.dirname(os.path.dirname(__file__))

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
LABEL_MAP_FILE = 'mscoco_label_map.pbtxt'
PATH_TO_CKPT = os.path.join(BASE_DIR, 'object_detection', MODEL_NAME,
                            'frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(BASE_DIR, 'object_detection', 'data',
                              LABEL_MAP_FILE)

# Loading label map
LABEL_MAP = label_map_util.load_labelmap(PATH_TO_LABELS)
# though mobilenet can handle
CATEGORIES = label_map_util.convert_label_map_to_categories(
    LABEL_MAP, max_num_classes=90, use_display_name=True)
CATEGORY_INDEX = label_map_util.create_category_index(CATEGORIES)

LABEL_KEYS = 'category instance confidence'.split()
COLOR_KEYS = 'black white red orange yellow green cyan blue purple pink'.split(
)
BB_KEYS = 'x y z width height depth'.split()
OBJECT_VECTOR_KEYS = LABEL_KEYS + BB_KEYS + COLOR_KEYS


class ObjectSeries(pd.Series):
    LABEL_KEYS = LABEL_KEYS
    COLOR_KEYS = COLOR_KEYS
    BB_KEYS = BB_KEYS
    OBJECT_VECTOR_KEYS = OBJECT_VECTOR_KEYS
    def __init__(self, graph_path, label_path, num_classes):
        import _init_paths
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(graph_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        with detection_graph.as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)
            # Predefine image size as required by SSD
            self.image_shape = [365, 640, 3]
            # Predefine confidence threshold
            self.thresh = 0.3
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, self.image_shape[0],
                    self.image_shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')
            self.image_tensor = image_tensor
            self.tensor_dict = tensor_dict

            label_map = label_map_util.load_labelmap(label_path)
            categories = label_map_util.convert_label_map_to_categories(
                label_map, max_num_classes=num_classes, use_display_name=True)
            self.category_index = label_map_util.create_category_index(
                categories)
Пример #28
0
 def _load_label_map(self):
     label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
     category_index = label_map_util.create_category_index(categories)
     return category_index
Пример #29
0
def setup_platform(hass, config, add_entities, discovery_info=None):
    """Set up the TensorFlow image processing platform."""
    model_config = config.get(CONF_MODEL)
    model_dir = model_config.get(CONF_MODEL_DIR) \
        or hass.config.path('tensorflow')
    labels = model_config.get(CONF_LABELS) \
        or hass.config.path('tensorflow', 'object_detection',
                            'data', 'mscoco_label_map.pbtxt')

    # Make sure locations exist
    if not os.path.isdir(model_dir) or not os.path.exists(labels):
        _LOGGER.error("Unable to locate tensorflow models or label map")
        return

    # append custom model path to sys.path
    sys.path.append(model_dir)

    try:
        # Verify that the TensorFlow Object Detection API is pre-installed
        # pylint: disable=unused-import,unused-variable
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        import tensorflow as tf # noqa
        from object_detection.utils import label_map_util # noqa
    except ImportError:
        # pylint: disable=line-too-long
        _LOGGER.error(
            "No TensorFlow Object Detection library found! Install or compile "
            "for your system following instructions here: "
            "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa
        return

    try:
        # Display warning that PIL will be used if no OpenCV is found.
        # pylint: disable=unused-import,unused-variable
        import cv2 # noqa
    except ImportError:
        _LOGGER.warning(
            "No OpenCV library found. TensorFlow will process image with "
            "PIL at reduced resolution")

    # Set up Tensorflow graph, session, and label map to pass to processor
    # pylint: disable=no-member
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    session = tf.Session(graph=detection_graph)
    label_map = label_map_util.load_labelmap(labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    entities = []

    for camera in config[CONF_SOURCE]:
        entities.append(TensorFlowImageProcessor(
            hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME),
            session, detection_graph, category_index, config))

    add_entities(entities)
Пример #30
0
def main():

    parser = argparse.ArgumentParser(
        description="run inference by using specified model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('model_name', help="specify the model name")
    parser.add_argument('work_dir', help="specify the work space directory")
    parser.add_argument('--model_dir', default=None,
                        help="specify the dir storing models.")

    args = parser.parse_args()

    model_dir = args.model_dir
    if model_dir is None:
        assert os.getenv('MODEL_INPUT_DIR') is not None
        model_dir = os.path.join(os.getenv('MODEL_INPUT_DIR'),
                                 'object_detection')

    model_name = args.model_name
    model_file = model_name + '.tar.gz'
    tar_file = tarfile.open(os.path.join(model_dir, model_file))
    recorded_name = model_name
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            recorded_name = file.name
            tar_file.extract(file, args.work_dir)

    PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
    PATH_TO_CKPT = os.path.join(args.work_dir, recorded_name)
    NUM_CLASSES = 90

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name=model_name)

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
                    label_map, max_num_classes=NUM_CLASSES,
                    use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    PATH_TO_TEST_IMAGES_DIR = 'test_images'
    TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR,
                                     'image{}.jpg'.format(i))
                        for i in range(1, 2)]

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:

            image_tensor = detection_graph.get_tensor_by_name(
                '{}/image_tensor:0'.format(model_name))
            detection_boxes = detection_graph.get_tensor_by_name(
                '{}/detection_boxes:0'.format(model_name))
            detection_scores =  detection_graph.get_tensor_by_name(
                '{}/detection_scores:0'.format(model_name))
            detection_classes = detection_graph.get_tensor_by_name(
                '{}/detection_classes:0'.format(model_name))
            num_detections = detection_graph.get_tensor_by_name(
                '{}/num_detections:0'.format(model_name))

            for image_path in TEST_IMAGE_PATHS:
                image = Image.open(image_path)
                image_np = load_image_into_numpy_array(image)
                image_np_expanded = np.expand_dims(image_np, axis=0)

                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                results = sess.run([detection_boxes, detection_scores,
                                    detection_classes, num_detections],
                                   feed_dict={image_tensor: image_np_expanded},
                                   options=options, run_metadata=run_metadata)
                cg = CompGraph(model_name, run_metadata, detection_graph)

                cg_tensor_dict = cg.get_tensors()
                cg_sorted_keys = sorted(cg_tensor_dict.keys())
                #cg_sorted_shape = []
                #for cg_key in cg_sorted_keys:
                #    print(cg_key)
                #    t = tf.shape(cg_tensor_dict[cg_key])
                #    cg_sorted_shape.append(t.eval(feed_dict={image_tensor: image_np_expanded},
                #                                  session=sess))

                cg_sorted_items = []
                for cg_key in cg_sorted_keys:
                    cg_sorted_items.append(tf.shape(cg_tensor_dict[cg_key]))

                cg_sorted_shape = sess.run(cg_sorted_items,
                                            feed_dict={image_tensor: image_np_expanded})
                cg.op_analysis(dict(zip(cg_sorted_keys, cg_sorted_shape)),
                               '{}.pickle'.format(model_name))

                print('Image: {}, number of detected: {}'.format(
                    image_path, len(results[3])))
Пример #31
0
def setup_platform(hass, config, add_entities, discovery_info=None):
    """Set up the TensorFlow image processing platform."""
    model_config = config.get(CONF_MODEL)
    model_dir = model_config.get(CONF_MODEL_DIR) \
        or hass.config.path('tensorflow')
    labels = model_config.get(CONF_LABELS) \
        or hass.config.path('tensorflow', 'object_detection',
                            'data', 'mscoco_label_map.pbtxt')

    # Make sure locations exist
    if not os.path.isdir(model_dir) or not os.path.exists(labels):
        _LOGGER.error("Unable to locate tensorflow models or label map")
        return

    # append custom model path to sys.path
    sys.path.append(model_dir)

    try:
        # Verify that the TensorFlow Object Detection API is pre-installed
        # pylint: disable=unused-import,unused-variable
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        import tensorflow as tf # noqa
        from object_detection.utils import label_map_util # noqa
    except ImportError:
        # pylint: disable=line-too-long
        _LOGGER.error(
            "No TensorFlow Object Detection library found! Install or compile "
            "for your system following instructions here: "
            "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa
        return

    try:
        # Display warning that PIL will be used if no OpenCV is found.
        # pylint: disable=unused-import,unused-variable
        import cv2 # noqa
    except ImportError:
        _LOGGER.warning(
            "No OpenCV library found. TensorFlow will process image with "
            "PIL at reduced resolution")

    # setup tensorflow graph, session, and label map to pass to processor
    # pylint: disable=no-member
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    session = tf.Session(graph=detection_graph)
    label_map = label_map_util.load_labelmap(labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    entities = []

    for camera in config[CONF_SOURCE]:
        entities.append(TensorFlowImageProcessor(
            hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME),
            session, detection_graph, category_index, config))

    add_entities(entities)
def pipeline(cap):

    # Default resolutions of the frame are obtained.The default resolutions are system dependent.
    # We convert the resolutions from float to integer.
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))

    print('-------SIZES-----')
    print(frame_width, frame_height)

    # Define the codec and create VideoWriter object.The output is stored in 'output.avi' file.
    out = cv2.VideoWriter(FILE_OUTPUT,
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                          (frame_width, frame_height))

    sys.path.append("..")

    # Object detection imports
    # Here are the imports from the object detection module.
    from object_detection.utils import label_map_util
    from object_detection.utils import visualization_utils as vis_util

    # Model preparation
    MODEL_NAME = 'ssd_mobilenet_v2_quantized_300x300_coco'
    PATH_TO_CKPT = 'trained-inference-graphs/output_inference_graph_v2/frozen_inference_graph.pb'
    # PATH_TO_LABELS = os.path.join('data', '<LABEL_NAME>.pbtxt')
    PATH_TO_LABELS = 'annotations/label_map.pbtxt'
    NUM_CLASSES = 3
    TEST_IMAGE_PATHS = 'image_frames_'

    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # Loading label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')

            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

            print('------------PRE----------')
            while (cap.isOpened()):

                # Capture frame-by-frame
                ret, frame = cap.read()

                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(frame, axis=0)

                # Actual detection.
                start = time()
                #print('START TIME', start)
                (boxes, scores, classes,
                 num) = sess.run([
                     detection_boxes, detection_scores, detection_classes,
                     num_detections
                 ],
                                 feed_dict={image_tensor: image_np_expanded})
                end = time()
                #print('END TIME', end)
                inference_time = end - start
                #print('INFERENCE TIME', inference_time)

                print('------ACTUAL DETECTION-----')
                # print('image exp>', image_np_expanded)
                # print('boxes', np.squeeze(boxes))
                # print('scores', np.squeeze(scores))
                # print('classes', np.squeeze(classes))
                # print('num', np.squeeze(num))
                # Here output the category as string and score to terminal
                #print([category_index.get(i) for i in classes[0]])
                # print(scores)

                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    frame,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)

                objects = []
                threshold = 0.3  # in order to get higher percentages you need to lower this number; usually at 0.01 you get 100% predicted objects
                for index, value in enumerate(classes[0]):
                    object_dict = {}
                    if scores[0, index] > threshold:
                        # object_dict[(category_index.get(value)).get('name').encode('utf8')] = \
                        #             scores[0, index]
                        object_dict['start'] = start
                        object_dict['end'] = end
                        object_dict['prediction'] = (
                            category_index.get(value)).get('name')
                        object_dict['probability'] = scores[0, index]
                        object_dict['inference_time'] = inference_time

                        # print('NAME1>>>', (category_index.get(value)))
                        # print('NAME2>>>', (category_index.get(value)).get('name'))
                        objects.append(object_dict)
                print('Objects>>', objects)

                #print('OKAY???', len(np.where(scores[0] > threshold)[0])/num_detections[0])

                # if ret == True:
                #     # Saves for video
                #     #out.write(frame)

                #     # Display the resulting frame
                #     #cv2.imshow('Charving Detection', frame)

                #     #Close window when "Q" button pressed
                #     if cv2.waitKey(1) & 0xFF == ord('q'):
                #         break
                # else:
                #     break

                # end = time()
                # print('end>>', end)
        # When everything done, release the video capture and video write objects
        cap.release()
        # out.release()

        # Closes all the frames
        cv2.destroyAllWindows()
def main():
    print("Creating eval directory")
    os.makedirs(OUT_PATH_EVAL_IMAGES, exist_ok=True)

    # load frozen graph in memory
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # load label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                                use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    TEST_IMAGE_PATHS = []
    for im_file in os.listdir(PATH_TO_TEST_IMAGES_DIR):
        # print(im_file)
        if im_file.endswith(".jpeg") and not os.path.isfile(
                os.path.join(PATH_TO_TEST_IMAGES_DIR, im_file.replace(".jpeg", ".xml"))):
            TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, im_file))
    # TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 8) ]
    print(len(TEST_IMAGE_PATHS))
    # Size, in inches, of the output images.
    IMAGE_SIZE = (192, 128)

    subset_test = TEST_IMAGE_PATHS[:]
    print("We are going to run the inference for {} images".format(len(subset_test)))
    shuffle(subset_test)
    for image_path in subset_test:
        out_debug_image_path = os.path.join(OUT_PATH_EVAL_IMAGES, os.path.basename(image_path))
        if os.path.isfile(out_debug_image_path):
            continue

        image = Image.open(image_path)
        # the array based representation of the image will be used later in order to prepare the
        # result image with boxes and labels on it.
        image_np = load_image_into_numpy_array(image)
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        # Actual detection.
        output_dict = run_inference_for_single_image(image_np, detection_graph)
        # Visualization of the results of a detection.

        # draw only poles

        vis_util.visualize_boxes_and_labels_on_image_array(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=4,
            exclude_classes=["player"])
        plt.figure(figsize=IMAGE_SIZE)
        plt.imshow(image_np)

        #draw_court_lines_from_detections(image_np, output_dict['detection_boxes'],
        #                                 output_dict['detection_classes'],
        #                                 output_dict['detection_scores'])



        Image.fromarray(image_np).save(out_debug_image_path)
Пример #34
0
 def initialize_labels(self):
     path_to_label = os.path.join(self.model_name, 'label.pbtxt')
     label_map = label_map_util.load_labelmap(path=path_to_label)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=self.num_class, use_display_name=True)
     self.category_index = label_map_util.create_category_index(categories)
with detection_graph.as_default():
    # with sess.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(args['model'], 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.2

sess = tf.Session(graph=detection_graph, config=config)

label_map = label_map_util.load_labelmap(args['pbtxt'])
categories = label_map_util.convert_label_map_to_categories(
    label_map,
    max_num_classes=args['number_of_classes'],
    use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Read and preprocess an image.
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
vs.set(cv2.CAP_PROP_SETTINGS, 1)
while True:
    _, frame = vs.read()
    rows = frame.shape[0]
    cols = frame.shape[1]
    # frame = cv2.resize(frame, (300, 300))
    # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

    # frame = frame[...,[2,0,1]]
from object_detection.utils import visualization_utils as vis_util

CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
def recognize_person(known_face_encodings, known_face_names):
    """
    Function for recognize person's body on video

    @parameter known_face_encodings: List of encoding vectors of faces from current database
    @type known_face_encodings: C{list}

    @parameter known_face_names: path to the new photo
    @type known_face_names: C{list}

    """

    # Initialize model for body detection
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)

    category_index = label_map_util.create_category_index(categories)

    # Initialize connect with server
    credentials = pika.PlainCredentials(USER, PASSWORD)
    parameters = pika.ConnectionParameters(IP, PORT, credentials=credentials)
    connection = pika.BlockingConnection(parameters)
    channel = connection.channel()

    # Initialize parameters for logging
    last_visible = np.array([False for _ in range(0, len(known_face_names))],
                            dtype=np.bool)
    last_visible_time = [
        datetime.datetime.min for _ in range(0, len(known_face_names))
    ]

    last_no_face = False
    last_no_face_time = datetime.datetime.min

    last_unknown = False
    last_unknown_time = datetime.datetime.min

    last_update_face_base = datetime.datetime(1, 1, 1, 0, 0, 0)
    update_time = time.time() + TIMEOUT_UPDATE

    process_this_frame = True

    # Get video stream and processed frame
    camera = cv2.VideoCapture(CAMERA_ID)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            while True:
                # Check for timeout for updating database
                if time.time() > update_time:
                    update_time = time.time() + TIMEOUT_UPDATE
                    if (datetime.datetime.now() -
                            last_update_face_base).days >= TIME_TO_UPDATE:
                        known_face_encodings, known_face_names = read_known_faces(
                        )
                        last_update_face_base = datetime.datetime.now()

                # Get picture from stream
                ret, frame = camera.read()
                small_frame = cv2.resize(frame, (0, 0),
                                         fx=1 / DECREASING_LEVEL,
                                         fy=1 / DECREASING_LEVEL)
                rgb_small_frame = small_frame[:, :, ::-1]

                if process_this_frame:
                    # Get detected objects (bodies and faces)
                    image_np_expanded = np.expand_dims(frame, axis=0)
                    image_tensor = detection_graph.get_tensor_by_name(
                        'image_tensor:0')
                    boxes = detection_graph.get_tensor_by_name(
                        'detection_boxes:0')
                    scores = detection_graph.get_tensor_by_name(
                        'detection_scores:0')
                    classes = detection_graph.get_tensor_by_name(
                        'detection_classes:0')
                    num_detections = detection_graph.get_tensor_by_name(
                        'num_detections:0')
                    (boxes, scores, classes, num_detections) = sess.run(
                        [boxes, scores, classes, num_detections],
                        feed_dict={image_tensor: image_np_expanded})

                    n_body = 0
                    for i in range(0, scores.shape[1]):
                        if scores[0][i] > 0.5:
                            n_body += 1
                        else:
                            break

                    # Get coordinates of box around faces
                    face_locations = face_recognition.face_locations(
                        rgb_small_frame)

                    now_no_face = False

                    # Check number of detected faces and bodies
                    n_faces = len(face_locations)
                    if n_body > n_faces:
                        # Send alarm if anybody try to hide face
                        now_no_face = True
                        now = datetime.datetime.now()
                        if not last_no_face:
                            last_no_face_time = now
                        else:
                            if last_no_face_time != datetime.datetime.min:
                                delta = now - last_no_face_time
                                if delta.seconds > TIMEOUT:
                                    with open("logging.txt", "a+") as log_file:
                                        user_id = None
                                        send_data = {
                                            "userId": user_id,
                                            "cameraId": str(CAMERA_ID)
                                        }
                                        json_send_data = json.dumps(send_data)

                                        channel.basic_publish(
                                            exchange='',
                                            routing_key='users',
                                            body=json_send_data)

                                        log_file.write(
                                            "\nALARM NO FACE at " +
                                            now.strftime("%H:%M:%S %d-%m-%Y"))
                                        last_no_face_time = datetime.datetime.min

                    # Get identified faces embeddings
                    face_encodings = face_recognition.face_encodings(
                        rgb_small_frame, face_locations)
                    face_names = []
                    now_visible = np.array(
                        [False for _ in range(0, len(known_face_names))],
                        dtype=np.bool)
                    now_unknown = False

                    # Find similar face from database
                    for face_encoding in face_encodings:
                        name = "Unknown"
                        matches = face_recognition.compare_faces(
                            known_face_encodings, face_encoding)

                        face_distances = face_recognition.face_distance(
                            known_face_encodings, face_encoding)
                        best_match_index = np.argmin(face_distances)
                        if matches[best_match_index]:
                            # Current face was recognized - send record about it
                            name = known_face_names[best_match_index]
                            now_visible[best_match_index] = True
                            now = datetime.datetime.now()
                            if not last_visible[best_match_index]:
                                last_visible_time[best_match_index] = now
                            else:
                                if last_visible_time[
                                        best_match_index] != datetime.datetime.min:
                                    delta = now - last_visible_time[
                                        best_match_index]
                                    if delta.seconds > TIMEOUT:
                                        with open("logging.txt",
                                                  "a+") as log_file:
                                            user_id = name.split('_')[0]
                                            send_data = {
                                                "userId": user_id,
                                                "cameraId": CAMERA_ID
                                            }
                                            json_send_data = json.dumps(
                                                send_data)

                                            channel.basic_publish(
                                                exchange='',
                                                routing_key='users',
                                                body=json_send_data)

                                            log_file.write(
                                                "\nRecognize " + name +
                                                " at " + now.strftime(
                                                    "%H:%M:%S %d-%m-%Y"))
                                            last_visible_time[
                                                best_match_index] = datetime.datetime.min
                        else:
                            # Current face was NOT recognized - send alarm about it
                            now_unknown = True
                            now = datetime.datetime.now()
                            if not last_unknown:
                                last_unknown_time = now
                            else:
                                if last_unknown_time != datetime.datetime.min:
                                    delta = now - last_unknown_time
                                    if delta.seconds > TIMEOUT:
                                        with open("logging.txt",
                                                  "a+") as log_file:
                                            user_id = None
                                            send_data = {
                                                "userId": user_id,
                                                "cameraId": CAMERA_ID
                                            }
                                            json_send_data = json.dumps(
                                                send_data)

                                            channel.basic_publish(
                                                exchange='',
                                                routing_key='users',
                                                body=json_send_data)

                                            log_file.write(
                                                "\nALARM at " + now.strftime(
                                                    "%H:%M:%S %d-%m-%Y"))
                                            last_unknown_time = datetime.datetime.min

                        face_names.append(name)

                    last_visible = copy.deepcopy(now_visible)
                    last_no_face = now_no_face
                    last_unknown = now_unknown

                process_this_frame = not process_this_frame

                # Visualize box around person
                vis_util.visualize_boxes_and_labels_on_image_array(
                    frame,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8,
                    skip_labels=True,
                    skip_scores=True)

                # Visualize box around face with name
                for (face_top, face_right, face_bottom,
                     face_left), name in zip(face_locations, face_names):
                    face_coordinates = {
                        "top": face_top * DECREASING_LEVEL,
                        "right": face_right * DECREASING_LEVEL,
                        "bottom": face_bottom * DECREASING_LEVEL,
                        "left": face_left * DECREASING_LEVEL
                    }

                    if name == "Unknown":
                        color = RED_COLOR
                    else:
                        color = BLUE_COLOR

                    # Get face's coordinates
                    cv2.rectangle(
                        frame,
                        (face_coordinates["left"], face_coordinates["top"]),
                        (face_coordinates["right"],
                         face_coordinates["bottom"]), color, 2)

                    # Visualize person's name if he was recognized
                    text_coordinates = get_text_coordinates(
                        name, face_coordinates)
                    cv2.rectangle(frame, (text_coordinates["left"] - 5,
                                          face_coordinates["bottom"]),
                                  (text_coordinates["right"] + 5,
                                   text_coordinates["bottom"] + 8), color,
                                  cv2.FILLED)
                    cv2.putText(frame, name, (text_coordinates["left"],
                                              text_coordinates["bottom"] + 4),
                                TEXT_FONT, 1.0, WHITE_COLOR, 1)

                cv2.imshow('Video', frame)

                # Press 'q' to quit
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

                process_this_frame = not process_this_frame

    connection.close()
    camera.release()
    cv2.destroyAllWindows()

    return known_face_encodings, known_face_names
Пример #38
0
    def inference(self):
        PATH_TO_CKPT = "D:/GitHub/traffic_sign_object_detection/fine_tuned_model/ssd_1st/frozen_inference_graph.pb"
        PATH_TO_LABELS = "D:/GitHub/traffic_sign_object_detection/data/annotations/label_map.pbtxt"

        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            sess = tf.Session(graph=detection_graph)

        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        a_dict = {"bicycle": 1, "child":2, "const":3, "bump":2, "cross":4, "":0}
        result_list = []

        for i in range(5):
            # prepare image
            self.ret, frame = self.video.read()
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_expanded = np.expand_dims(frame_rgb, axis=0)
            # inference
            (boxes, scores, classes, num) = sess.run(
                [detection_boxes, detection_scores, detection_classes, num_detections],
                feed_dict={image_tensor: frame_expanded})
            disp_name = vis_util.visualize_boxes_and_labels_on_image_array(
                frame,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8,
                min_score_thresh=0.6
                )
            
            # slicing the name
            disp_name = disp_name.split(":")[0]
            # swith to number
            result = a_dict[disp_name]
            # list append
            result_list.append(result)

        num_1 = result_list.count(1)
        num_2 = result_list.count(2)
        num_3 = result_list.count(3)
        num_4 = result_list.count(4)

        return_last = 0
        if num_1 >= 3:
            return_last = 1
        elif num_2 >= 3:
            return_last = 2
        elif num_3 >= 3:
            return_last = 3
        elif num_4 >= 3:
            return_last = 4

        # return the result-integer
        return return_last
    def __init__(self):

        # ROS initialize
        rospy.init_node('ros_tensorflow_ObjectDetection')
        rospy.on_shutdown(self.shutdown)

        # Set model path and image topic
        model_path = rospy.get_param("~model_path", "")
        image_topic = rospy.get_param("~image_topic", "")

        self._cv_bridge = CvBridge()

        rospy.loginfo("finding model path...")
        '''select model path ,model label and model name,include 'MODEL_NAME' 'PATH_TO_CKPT' and 'PATH_TO_LABELS' '''

        MODEL_NAME = '/outputing'
        PATH_TO_CKPT = model_path + MODEL_NAME + '/frozen_inference_graph.pb'

        PATH_TO_LABELS = os.path.join(model_path + '/data',
                                      'frame_label_map.pbtxt')

        # What model to download.
        #        MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
        #        MODEL_FILE = MODEL_NAME + '.tar.gz'
        #        DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        #        PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

        # List of the strings that is used to add correct label for each box.
        #        PATH_TO_LABELS = os.path.join(model_path+'/data', 'mscoco_label_map.pbtxt')

        #        NUM_CLASSES = 1
        NUM_CLASSES = 90

        # Download Model
        #        rospy.loginfo("Downloading models...")            #send loginfo
        #        opener = urllib.request.URLopener()
        #        opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
        #        tar_file = tarfile.open(MODEL_FILE)
        #        for file in tar_file.getmembers():
        #            file_name = os.path.basename(file.name)       #use os.path.basename for
        #            if 'frozen_inference_graph.pb' in file_name:
        #                    tar_file.extract(file, os.getcwd())   #os.getcwd()

        #Load a (frozen) Tensorflow model into memory.
        self.detection_graph = tf.Graph()

        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        rospy.loginfo("loading models' label ......")
        rospy.loginfo("please wait")

        # Loading label map
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)

        #Initialize ROS Subscriber and Publisher
        self._sub = rospy.Subscriber(image_topic,
                                     ROSImage,
                                     self.callback,
                                     queue_size=100)
        self._pub = rospy.Publisher('object_detection', ROSImage, queue_size=1)
        rospy.loginfo("Start object dectecter ...")

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #
def object_detect(file1, file2):
    import numpy as np
    import tensorflow as tf
    import scipy.misc
    from PIL import Image

    if tf.__version__ != '1.4.0':
        raise ImportError(
            'Please upgrade your tensorflow installation to v1.4.0!')

    from object_detection.utils import label_map_util
    from object_detection.utils import visualization_utils as vis_util

    MODEL_NAME = 'training'
    PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
    PATH_TO_LABELS = MODEL_NAME + '/object-detection.pbtxt'
    NUM_CLASSES = 1

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    def load_image_into_numpy_array(image):
        (im_width, im_height) = image.size
        return np.array(image.getdata()).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

            image = Image.open(file1)
            image_np = load_image_into_numpy_array(image)
            image_np_expanded = np.expand_dims(image_np, axis=0)

            (boxes, scores, classes,
             num) = sess.run([
                 detection_boxes, detection_scores, detection_classes,
                 num_detections
             ],
                             feed_dict={image_tensor: image_np_expanded})

            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=4)

            scipy.misc.imsave(file2, image_np)
def main():
    print("starting program . . .")

    if not checkIfNecessaryPathsAndFilesExist():
        return
    # end if

    # this next comment line is necessary to avoid a false PyCharm warning
    # noinspection PyUnresolvedReferences
    if StrictVersion(tf.__version__) < StrictVersion('1.5.0'):
        raise ImportError(
            'Please upgrade your tensorflow installation to v1.5.* or later!')
    # end if

    # load a (frozen) TensorFlow model into memory
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FROZEN_INFERENCE_GRAPH_LOC, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        # end with
    # end with

    # Loading label map
    # Label maps map indices to category names, so that when our convolution network predicts `5`,
    # we know that this corresponds to `airplane`.  Here we use internal utility functions,
    # but anything that returns a dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(LABELS_LOC)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    imageFilePaths = []

    for child_dir in [
            f.path for f in os.scandir(TEST_IMAGE_DIR) if f.is_dir()
    ]:
        for imageFileName in os.listdir(child_dir):
            if imageFileName.endswith(".jpg"):
                imageFilePaths.append(child_dir + "/" + imageFileName)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            for image_path in imageFilePaths:

                image_np = cv2.imread(image_path)

                if image_np is None:
                    print("error reading file " + image_path)
                    continue
                # end if

                # Definite input and output Tensors for detection_graph
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                detection_boxes = detection_graph.get_tensor_by_name(
                    'detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                detection_scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                detection_classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')

                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                # Actual detection.
                (boxes, scores, classes,
                 num) = sess.run([
                     detection_boxes, detection_scores, detection_classes,
                     num_detections
                 ],
                                 feed_dict={image_tensor: image_np_expanded})

                # print out, what was predicted

                objects = []
                threshold = 0.2  # in order to get higher percentages you need to lower this number; usually at 0.01 you get 100% predicted objects
                for index, value in enumerate(classes[0]):
                    object_dict = {}
                    if scores[0, index] > threshold:
                        object_dict[(category_index.get(value)).get(
                            'name').encode('utf8')] = scores[0, index]
                        objects.append(object_dict)
                # objects: [{b'mouse': 0.971244}]
                # print(objects)
                # we assume there is only one object found:
                try:
                    classification = list(objects[0].keys())[0]
                    score = round(objects[0][classification] * 100, 2)
                    classification = classification.decode("utf-8")
                except:
                    classification = "-"
                    score = "-"
                print("%s : %s : %r " % (image_path, classification, score))

                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)
                resized_image = cv2.resize(image_np, (0, 0), fx=0.8, fy=0.8)
                cv2.imshow("image_np", resized_image)
                cv2.waitKey()
Пример #42
0
def main(_):
  assert FLAGS.train_dir, '`train_dir` is missing.'
  assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'

  configs = config_util.get_configs_from_pipeline_file(
      FLAGS.pipeline_config_path)
  if FLAGS.task == 0:
    tf.gfile.MakeDirs(FLAGS.train_dir)
    tf.gfile.Copy(FLAGS.pipeline_config_path,
                  os.path.join(FLAGS.train_dir, 'pipeline.config'),
                  overwrite=True)

  tf.gfile.MakeDirs(FLAGS.eval_dir)
  tf.gfile.Copy(FLAGS.pipeline_config_path,
                os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                overwrite=True)

  model_config = configs['model']

  train_config = configs['train_config']
  train_input_config = configs['train_input_config']

  eval_config = configs['eval_config']
  if FLAGS.eval_training_data:
    eval_input_config = configs['train_input_config']
  else:
    eval_input_config = configs['eval_input_config']

  # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training.
  # total number of training is set to total_num_epochs provided in the config
  if train_config.num_steps:
    total_num_epochs = train_config.num_steps
    train_config.num_steps = FLAGS.epochs_between_evals
    total_training_cycle = total_num_epochs // train_config.num_steps
  else:
    # TODO(mehdi): make it run indef
    total_num_epochs = 20000000
    train_config.num_steps = FLAGS.epochs_between_evals
    total_training_cycle = total_num_epochs // train_config.num_steps

  train_model_fn = functools.partial(model_builder.build,
                                     model_config=model_config,
                                     is_training=True)
  eval_model_fn = functools.partial(model_builder.build,
                                    model_config=model_config,
                                    is_training=False)

  def get_next(config):
    return dataset_util.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  # functions to create a tensor input dictionary for both training & evaluation
  train_input_dict_fn = functools.partial(get_next, train_input_config)
  eval_input_dict_fn = functools.partial(get_next, eval_input_config)

  # If not explicitly specified in the constructor and the TF_CONFIG
  # environment variable is present, load cluster_spec from TF_CONFIG.
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  cluster_data = env.get('cluster', None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
  task_data = env.get('task', {'type': 'master', 'index': 0})
  task_info = type('TaskSpec', (object,), task_data)

  # Parameters for a single worker.
  parameter_server_tasks = 0
  worker_replicas = 1
  worker_job_name = 'lonely_worker'
  task = 0
  is_chief = True
  master = ''

  if cluster_data and 'worker' in cluster_data:
    # Number of total worker replicas include "worker"s and the "master".
    worker_replicas = len(cluster_data['worker']) + 1
  if cluster_data and 'ps' in cluster_data:
    parameter_server_tasks = len(cluster_data['ps'])

  if worker_replicas > 1 and parameter_server_tasks < 1:
    raise ValueError('At least 1 ps task is needed for distributed training.')

  if worker_replicas >= 1 and parameter_server_tasks > 0:
    # Set up distributed training.
    server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
                             job_name=task_info.type,
                             task_index=task_info.index)
    if task_info.type == 'ps':
      server.join()
      return

    worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
    task = task_info.index
    is_chief = (task_info.type == 'master')
    master = server.target

  label_map = label_map_util.load_labelmap(eval_input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(label_map,
                                                              max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  train_graph_rewriter_fn = eval_graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    train_graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=True)
    eval_graph_rewriter_fn = graph_rewriter_builder.build(
        configs['eval_rewriter_config'], is_training=False)

  def train():
    return trainer.train(create_tensor_dict_fn=train_input_dict_fn,
                         create_model_fn=train_model_fn,
                         train_config=train_config, master=master, task=task,
                         num_clones=FLAGS.num_clones,
                         worker_replicas=worker_replicas,
                         clone_on_cpu=FLAGS.clone_on_cpu,
                         ps_tasks=parameter_server_tasks,
                         worker_job_name=worker_job_name,
                         is_chief=is_chief, train_dir=FLAGS.train_dir,
                         graph_hook_fn=train_graph_rewriter_fn)

  def evaluate():
    return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config,
                              categories, FLAGS.train_dir, FLAGS.eval_dir,
                              graph_hook_fn=eval_graph_rewriter_fn)

  for cycle_index in range(total_training_cycle):
    tf.logging.info('Starting a training cycle: %d/%d',
                    cycle_index, total_training_cycle)
    train()
    tf.logging.info('Starting to evaluate.')
    eval_metrics = evaluate()
    if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap):
      tf.logging.info('Stopping criteria met. Training stopped')
      break
Пример #43
0
 def load_label_map(self, label_map_path):
     label_map = label_map_util.load_labelmap(label_map_path)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
     category_index = label_map_util.create_category_index(categories)
     return categories, category_index