def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Declare a variable to hold the prediction times prediction_times = [] mean_time_to_predict = 0.0 while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. image = get_image_from_camera(camera) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Send the image to the compiled model and get the predictions numpy array # with scores, measure how long it takes start = time.time() predictions = model.predict(input_data) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) # Generate header text that represents the top5 predictions header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) # Generate footer text that represents the mean evaluation time mean_time_to_predict = helpers.get_mean_duration( prediction_times, end - start) footer_text = "{:.0f}ms/frame".format(mean_time_to_predict * 1000) helpers.draw_footer(image, footer_text) # Display the image cv2.imshow("ELL model", image) print("Mean prediction time: {:.0f}ms/frame".format(mean_time_to_predict * 1000))
def process_frame(frame, categories, frame_count, output_frame_path): if frame is None: print("Not valid input frame! Skip...") return # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Get the model's output shape and create an array to hold the model's # output predictions output_shape = model.get_default_output_shape() predictions = model.FloatVector(output_shape.Size()) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model( frame, input_shape.columns, input_shape.rows) # Send the image to the compiled model and fill the predictions vector # with scores, measure how long it takes start = time.time() model.predict(input_data, predictions) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) if (len(top_5) > 0): # Generate header text that represents the top5 predictions header_text = ", ".join(["({:.0%}) {}".format( element[1], categories[element[0]]) for element in top_5]) helpers.draw_header(frame, header_text) # Generate footer text that represents the mean evaluation time time_delta = end - start footer_text = "{:.0f}ms/frame".format(time_delta * 1000) helpers.draw_footer(frame, footer_text) # save the processed frame output_file_path = os.path.join(output_frame_path, "recognized_{}.png".format(frame_count)) cv2.imwrite(output_file_path, frame) print("Processed frame {}: header text: {}, footer text: {}".format(frame_count, header_text, footer_text)) return header_text else: print("Processed frame {}: No recognized frame!") return None
def output_callback(self, predictions): """The output callback that the model calls when predictions are ready""" header_text = "" group, probability = self.get_group(predictions) if group: # A group was detected, so take action if group == "Dog": # A prediction in the dog category group was detected, print a `woof` print("Woof!") elif group == "Cat": # A prediction in the cat category group was detected, print a `meow` print("Meow!") header_text = "({:.0%}) {}".format(probability, group) helpers.draw_header(self.image, header_text) cv2.imshow("Grouping (with callbacks)", self.image)
def main(): init_GPIO() init_camera() init_options() shoot_categories = set([504]) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() model_wrapper = model.ModelWrapper() input_shape = model_wrapper.GetInputShape() preprocessing_metadata = helpers.get_image_preprocessing_metadata( model_wrapper) while (cv2.waitKey(1) & 0xFF) == 0xFF: image = get_image_from_camera(camera) input_data = helpers.prepare_image_for_model( image, input_shape.columns, input_shape.rows, preprocessing_metadata=preprocessing_metadata) input_data = model.FloatVector(input_data) predictions = model_wrapper.Predict(input_data) top_5 = helpers.get_top_n(predictions, 5) header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) if top_5: print(header_text) release_shutter() for element in top_5: if verbose: print(element[0]) if element[0] in shoot_categories: press_shutter() break cv2.imshow("BirdWatcher", image) rawCapture.truncate(0)
def main(): camera = cv2.VideoCapture(0) with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() model_wrapper = model.ModelWrapper() input_shape = model_wrapper.GetInputShape() preprocessing_metadata = helpers.get_image_preprocessing_metadata( model_wrapper) while (cv2.waitKey(1) & 0xFF) == 0xFF: image = get_image_from_camera(camera) input_data = helpers.prepare_image_for_model( image, input_shape.columns, input_shape.rows, preprocessing_metadata=preprocessing_metadata) input_data = model.FloatVector(input_data) predictions = model_wrapper.Predict(input_data) top_5 = helpers.get_top_n(predictions, 5) header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) cv2.imshow("ELL model", image)
def main(): """Entry point for the script when called directly""" # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("dogs.txt", "r") as dogs_file,\ open("cats.txt", "r") as cats_file: dogs = dogs_file.read().splitlines() cats = cats_file.read().splitlines() # Get the model wrapper in order to interact with the model model_wrapper = model.ModelWrapper() # Get the model's input dimensions. We'll use this information later to # resize images appropriately. input_shape = model_wrapper.GetInputShape() # Get the model-specific preprocessing metadata preprocessing_metadata = helpers.get_image_preprocessing_metadata( model_wrapper) while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. If you'd like to use a different image, # load the image from some other source. image = get_image_from_camera(camera) # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can be # handed to the model input_data = helpers.prepare_image_for_model( image, input_shape.columns, input_shape.rows, preprocessing_metadata=preprocessing_metadata) # Wrap the resulting numpy array in a FloatVector input_data = model.FloatVector(input_data) # Get the predicted classes using the model's predict function on the # image input data. The predictions are returned as a numpy array with the # probability that the image # contains the class represented by that # index. predictions = model_wrapper.Predict(input_data) # Let's grab the value of the top prediction and its index, which # represents the top most confident match and the class or category it # belongs to. top_n = helpers.get_top_n(predictions, 1, threshold=0.05) # See whether the prediction is in one of our groups group = "" label = "" if top_n: top = top_n[0][0] if prediction_index_in_set(top, dogs): group = "Dog" elif prediction_index_in_set(top, cats): group = "Cat" header_text = "" if group: # A group was detected, so take action top = top_n[0] take_action(group) header_text = "({:.0%}) {}".format(top[1], group) helpers.draw_header(image, header_text) # Display the image using opencv cv2.imshow("Grouping", image)
def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Define the models we'll be using models = [model1, model2] # Get the models' input dimensions. We'll use this information later to # resize images appropriately. input_shapes = [model.get_default_input_shape() for model in models] # Create vectors to hold the models' output predictions prediction_arrays = [ model.FloatVector(model.get_default_output_shape()) for model in models ] # Declare a value to hold the prediction times prediction_times = [list(), list()] mean_time_to_predict = [0.0, 0.0] # Declare a tiled image used to compose our results tiled_image = helpers.TiledImage(len(models)) while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. If you'd like to use a different image, # load the image from some other source. image = get_image_from_camera(camera) # Run through models in random order to get a fairer average of # evaluation time model_indices = np.arange(len(models)) np.random.shuffle(model_indices) for model_index in model_indices: model = models[model_index] # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can # be handed to the model input_data = helpers.prepare_image_for_model( image, input_shapes[model_index].columns, input_shapes[model_index].rows) # Get the predicted classes using the model's predict function on # the image input data. The predictions are returned as a vector # with the probability that the image # contains the class # represented by that index. start = time.time() model.predict(input_data, prediction_arrays[model_index]) end = time.time() # Let's grab the value of the top 5 predictions and their index, # which represents the top five most confident matches and the # class or category they belong to. top_5 = helpers.get_top_n(prediction_arrays[model_index], n=5, threshold=0.10) # Draw header text that represents the top five predictions header_text = "".join([ "({:.0%}) {} ".format(element[1], categories[element[0]]) for element in top_5 ]) model_frame = np.copy(image) helpers.draw_header(model_frame, header_text) # Draw footer text representing the mean evaluation time mean_time_to_predict[model_index] = helpers.get_mean_duration( prediction_times[model_index], end - start) footer_text = "{:.0f}ms/frame".format( mean_time_to_predict[model_index] * 1000) helpers.draw_footer(model_frame, footer_text) # Set the image with the header and footer text as one of the tiles tiled_image.set_image_at(model_index, model_frame) tiled_image.show()