def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Declare a variable to hold the prediction times prediction_times = [] mean_time_to_predict = 0.0 while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. image = get_image_from_camera(camera) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model(image, input_shape.columns, input_shape.rows) # Send the image to the compiled model and get the predictions numpy array # with scores, measure how long it takes start = time.time() predictions = model.predict(input_data) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) # Generate header text that represents the top5 predictions header_text = ", ".join([ "({:.0%}) {}".format(element[1], categories[element[0]]) for element in top_5 ]) helpers.draw_header(image, header_text) # Generate footer text that represents the mean evaluation time mean_time_to_predict = helpers.get_mean_duration( prediction_times, end - start) footer_text = "{:.0f}ms/frame".format(mean_time_to_predict * 1000) helpers.draw_footer(image, footer_text) # Display the image cv2.imshow("ELL model", image) print("Mean prediction time: {:.0f}ms/frame".format(mean_time_to_predict * 1000))
def process_frame(frame, categories, frame_count, output_frame_path): if frame is None: print("Not valid input frame! Skip...") return # Get the model's input shape. We will use this information later to resize # images appropriately. input_shape = model.get_default_input_shape() # Get the model's output shape and create an array to hold the model's # output predictions output_shape = model.get_default_output_shape() predictions = model.FloatVector(output_shape.Size()) # Prepare an image for processing # - Resize and center-crop to the required width and height while # preserving aspect ratio. # - OpenCV gives the image in BGR order. If needed, re-order the # channels to RGB. # - Convert the OpenCV result to a std::vector<float> input_data = helpers.prepare_image_for_model( frame, input_shape.columns, input_shape.rows) # Send the image to the compiled model and fill the predictions vector # with scores, measure how long it takes start = time.time() model.predict(input_data, predictions) end = time.time() # Get the value of the top 5 predictions top_5 = helpers.get_top_n(predictions, 5) if (len(top_5) > 0): # Generate header text that represents the top5 predictions header_text = ", ".join(["({:.0%}) {}".format( element[1], categories[element[0]]) for element in top_5]) helpers.draw_header(frame, header_text) # Generate footer text that represents the mean evaluation time time_delta = end - start footer_text = "{:.0f}ms/frame".format(time_delta * 1000) helpers.draw_footer(frame, footer_text) # save the processed frame output_file_path = os.path.join(output_frame_path, "recognized_{}.png".format(frame_count)) cv2.imwrite(output_file_path, frame) print("Processed frame {}: header text: {}, footer text: {}".format(frame_count, header_text, footer_text)) return header_text else: print("Processed frame {}: No recognized frame!") return None
def main(): # Open the video camera. To use a different camera, change the camera # index. camera = cv2.VideoCapture(0) # Read the category names with open("categories.txt", "r") as categories_file: categories = categories_file.read().splitlines() # Define the models we'll be using models = [model1, model2] # Get the models' input dimensions. We'll use this information later to # resize images appropriately. input_shapes = [model.get_default_input_shape() for model in models] # Create vectors to hold the models' output predictions prediction_arrays = [ model.FloatVector(model.get_default_output_shape()) for model in models ] # Declare a value to hold the prediction times prediction_times = [list(), list()] mean_time_to_predict = [0.0, 0.0] # Declare a tiled image used to compose our results tiled_image = helpers.TiledImage(len(models)) while (cv2.waitKey(1) & 0xFF) == 0xFF: # Get an image from the camera. If you'd like to use a different image, # load the image from some other source. image = get_image_from_camera(camera) # Run through models in random order to get a fairer average of # evaluation time model_indices = np.arange(len(models)) np.random.shuffle(model_indices) for model_index in model_indices: model = models[model_index] # Prepare the image to pass to the model. This helper: # - crops and resizes the image maintaining proper aspect ratio # - reorders the image channels if needed # - returns the data as a ravelled numpy array of floats so it can # be handed to the model input_data = helpers.prepare_image_for_model( image, input_shapes[model_index].columns, input_shapes[model_index].rows) # Get the predicted classes using the model's predict function on # the image input data. The predictions are returned as a vector # with the probability that the image # contains the class # represented by that index. start = time.time() model.predict(input_data, prediction_arrays[model_index]) end = time.time() # Let's grab the value of the top 5 predictions and their index, # which represents the top five most confident matches and the # class or category they belong to. top_5 = helpers.get_top_n(prediction_arrays[model_index], n=5, threshold=0.10) # Draw header text that represents the top five predictions header_text = "".join([ "({:.0%}) {} ".format(element[1], categories[element[0]]) for element in top_5 ]) model_frame = np.copy(image) helpers.draw_header(model_frame, header_text) # Draw footer text representing the mean evaluation time mean_time_to_predict[model_index] = helpers.get_mean_duration( prediction_times[model_index], end - start) footer_text = "{:.0f}ms/frame".format( mean_time_to_predict[model_index] * 1000) helpers.draw_footer(model_frame, footer_text) # Set the image with the header and footer text as one of the tiles tiled_image.set_image_at(model_index, model_frame) tiled_image.show()