def detect_video(model, input_file, output_file, fps=30, score_filter=0.6): """Takes in a video and produces an output video with object detection run on it (i.e. displays boxes around detected objects in real-time). Output videos should have the .avi file extension. Note: some apps, such as macOS's QuickTime Player, have difficulty viewing these output videos. It's recommended that you download and use `VLC <https://www.videolan.org/vlc/index.html>`_ if this occurs. :param model: The trained model with which to run object detection. :type model: detecto.core.Model :param input_file: The path to the input video. :type input_file: str :param output_file: The name of the output file. Should have a .avi file extension. :type output_file: str :param fps: (Optional) Frames per second of the output video. Defaults to 30. :type fps: int :param score_filter: (Optional) Minimum score required to show a prediction. Defaults to 0.6. :type score_filter: float **Example**:: >>> from detecto.core import Model >>> from detecto.visualize import detect_video >>> model = Model.load('model_weights.pth', ['tick', 'gate']) >>> detect_video(model, 'input_vid.mp4', 'output_vid.avi', score_filter=0.7) """ # Read in the video video = cv2.VideoCapture(input_file) # Video frame dimensions frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Scale down frames when passing into model for faster speeds scaled_size = 800 scale_down_factor = min(frame_height, frame_width) / scaled_size # The VideoWriter with which we'll write our video with the boxes and labels # Parameters: filename, fourcc, fps, frame_size out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_width, frame_height)) # Transform to apply on individual frames of the video transform_frame = transforms.Compose([ # TODO Issue #16 transforms.ToPILImage(), transforms.Resize(scaled_size), transforms.ToTensor(), normalize_transform(), ]) # Loop through every frame of the video while True: ret, frame = video.read() # Stop the loop when we're done with the video if not ret: break # The transformed frame is what we'll feed into our model # transformed_frame = transform_frame(frame) transformed_frame = frame # TODO: Issue #16 predictions = model.predict(transformed_frame) # Add the top prediction of each class to the frame for label, box, score in zip(*predictions): if score < score_filter: continue # Since the predictions are for scaled down frames, # we need to increase the box dimensions # box *= scale_down_factor # TODO Issue #16 # Create the box around each object detected # Parameters: frame, (start_x, start_y), (end_x, end_y), (r, g, b), thickness cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 3) # Write the label and score for the boxes # Parameters: frame, text, (start_x, start_y), font, font scale, (r, g, b), thickness cv2.putText(frame, '{}: {}'.format(label, round(score.item(), 2)), (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3) # Write this frame to our video file out.write(frame) # If the 'q' key is pressed, break from the loop key = cv2.waitKey(1) & 0xFF if key == ord('q'): break # When finished, release the video capture and writer objects video.release() out.release() # Close all the frames cv2.destroyAllWindows()
from detecto import core, utils, visualize from torchvision import transforms augmentations = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(0.5), transforms.ColorJitter(saturation=0.5), transforms.ToTensor(), utils.normalize_transform(), ]) val_dataset = core.Dataset('/dataset/validation_images/') dataset = core.Dataset('//dataset/train_images/', transform=augmentations) model = core.Model(['rust']) loader = core.DataLoader(dataset, batch_size=2, shuffle=True) losses = model.fit(loader, val_dataset, epochs=10, learning_rate=0.001, lr_step_size=5, verbose=True) model.save('model/model_weights.pth')
def procesVideo(model, input_file, output_file, fps=30): video = cv2.VideoCapture(input_file) # Video frame dimensions frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Scale down frames when passing into model for faster speeds scaled_size = 800 scale_down_factor = min(frame_height, frame_width) / scaled_size out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_width, frame_height)) # Transform to apply on individual frames of the video transform_frame = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(scaled_size), transforms.ToTensor(), utils.normalize_transform(), ]) while True: carMovin = [] #new array to keep track of all the moving cars ret, frame = video.read() #Read frame if not ret: #if frame not read exits loop break frameArea = frame_height * frame_width #frame area to be used on mask to track objects big enough mask = subtractor.apply(frame) #background subtractor method newmask = cv2.medianBlur(mask, 3) #remove salt and paper noise (contour, heir) = cv2.findContours( newmask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) #Find the contours of moving objects for c in contour: #go through list of contours and find the ones big enough (x, y, w, h) = cv2.boundingRect(c) area = ((x + w) - x + 1) * ((y + h) - y + 1) if (area > (frameArea * 0.001)): carMovin.append([(x, y), (x + w, y + h) ]) #put into the array objects big enough transformed_frame = transform_frame( frame ) #makes the frame smaller to be processed by detecto function labels, boxs, scores = model.predict( transformed_frame ) #once predictions have been made positive cars are saved as tensorflow arrays boxs *= scale_down_factor #applys the scale down factor so the mapping is correct for box in boxs: #get every box from the predictions colourcar = False #keep track of car that is moving box = np.array( box.tolist(), dtype=int) #convert from tensorflow array to np array for cars in carMovin: # go through every moving car found if ( compare(box, cars) ): #first compare function used to find rectangles that overlap colourcar = True #if true the box should be coloured if (colourcar): cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), red, 2) #colour box red for spots in parkingspace_Coords: #go through all the parking spots saved colourspot = False #keep track of parking spot that has a car in it for box in boxs: #Find all the cars that are parked box = np.array( box.tolist(), dtype=int) #convert from tensorflow array to np array if comparesp( box, spots ): # second compare function for parking spots and car objects colourspot = True if (colourspot): cv2.rectangle(frame, (spots[0][0], spots[0][1]), (spots[2][0], spots[2][1]), blue, 2) #parking lot with car else: cv2.rectangle(frame, (spots[0][0], spots[0][1]), (spots[2][0], spots[2][1]), white, 2) #parking lot without car out.write(frame) #writes frame to file # If the 'q' key is pressed, break from the loop key = cv2.waitKey(1) & 0xFF if key == ord('q'): break # When finished, release the video capture and writer objects video.release() out.release() # Close all the frames cv2.destroyAllWindows()
def trainMountingConfigClassifier(self, train_path, val_path, device=torch.device('cuda')): """ This function uses Faster R-CNN ResNet50 FPN as the base network and as a transfer learning framework to train a model that performs object detection on the mounting configuration of solar arrays. It uses the training data to locate and classify mounting configuration of the solar installation. It uses the validation data to prevent overfitting and to test the prediction on the fly. Parameters ----------- train_path: string This is the path to the folder that contains the training images Note that the directory must be structured in this format: train_path/ ...images/ ......a_image_1.png ......a_image_2.png ...annotations/ ......b_image_1.xml ......b_image_2.xml val_path: string This is the path to the folder that contains the validation images Note that the directory must be structured in this format: val_path/ ...images/ ......a_image_1.png ......a_image_2.png ...annotations/ ......b_image_1.xml ......b_image_2.xml device: string This argument is passed to the Model() class in Detecto. It determines how to run the model: either on GPU via Cuda (default setting), or on CPU. Please note that running the model on GPU results in significantly faster training times. Returns ----------- model: detecto.core.Model object The final trained mounting configuration object detection model. """ # Convert the data set combinations (png + xml) to a CSV record. val_labels_path = (val_path + '/annotations.csv') train_labels_path = (train_path + '/annotations.csv') utils.xml_to_csv(train_path + '/annotations/', train_labels_path) utils.xml_to_csv(val_path + '/annotations/', val_labels_path) # Custom oversampling to balance out our classes train_data = pd.read_csv(train_labels_path) class_count = pd.Series(train_data['class'].value_counts()) train_data_resampled = train_data.copy() for index, count in class_count.iteritems(): number_times_resample = class_count.max() - count # Randomly sample a class X times class_index_list = list( train_data[train_data['class'] == index].index) # Resample the list with with replacement idx_to_duplicate = choices(class_index_list, k=number_times_resample) for idx in idx_to_duplicate: dup = train_data.loc[idx] # Add to the dataframe train_data_resampled = \ train_data_resampled.append(dup, ignore_index=True) # Reindex after all of the duplicates have been added train_data_resampled = train_data_resampled.reset_index(drop=True) # Re-write the resampled data set train_data_resampled.to_csv(train_labels_path, index=False) custom_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(800), transforms.ToTensor(), utils.normalize_transform() ]) # Load in the training and validation data sets dataset = core.Dataset(train_labels_path, train_path + '/images', transform=custom_transforms) val_dataset = core.Dataset(val_labels_path, val_path + '/images') # Customize training options loader = core.DataLoader(dataset, batch_size=self.batch_size, shuffle=True) model = core.Model([ "ground-fixed", "carport-fixed", "rooftop-fixed", "ground-single_axis_tracker" ], device=device) losses = model.fit(loader, val_dataset, epochs=self.no_of_epochs, learning_rate=self.learning_rate, verbose=True) plt.plot(losses) plt.show() return model
bar = progressbar.ProgressBar(current_value=current_frame, max_value=frame_count).start() # Scale down frames when passing into model for faster speeds scaled_size = 256 scale_down_factor = min(frame_height, frame_width) / scaled_size # The VideoWriter with which we'll write our video with the boxes and labels # Parameters: filename, fourcc, fps, frame_size out = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) # Transform to apply on individual frames of the video transform_frame = transforms.Compose([ # TODO Issue #16 transforms.ToPILImage(), transforms.Resize(scaled_size), transforms.ToTensor(), normalize_transform(), ]) # Loop through every frame of the video while True: ret, frame = video.read() # Stop the loop when we're done with the video if not ret: break # The transformed frame is what we'll feed into our model transformed_frame = transform_frame(frame) predictions = model.predict(transformed_frame) # Add the top prediction of each class to the frame for label, box, score in zip(*predictions):
import matplotlib.pyplot as plt from torchvision import transforms from detecto.utils import normalize_transform from detecto.core import Dataset, DataLoader, Model IMAGE_DIR = '/Users/noahmushkin/codes/selenium-python-scraping/data/images/cameras/' LABEL_DIR = '/Users/noahmushkin/codes/selenium-python-scraping/data/labeled_cams_convert/' img_transform = transforms.Compose([ transforms.ToPILImage(), # Note: all images with a size smaller than 800 will be scaled up in size transforms.Resize(400), transforms.RandomHorizontalFlip(0.5), transforms.ColorJitter(saturation=0.2), transforms.ToTensor(), # required normalize_transform(), # required ]) dataset = Dataset(LABEL_DIR, IMAGE_DIR, transform=img_transform) labels = ['camera'] model = Model(classes=labels) loader = DataLoader(dataset, batch_size=32, shuffle=True) losses = model.fit(loader, epochs=10, learning_rate=0.005) plt.plot(losses) plt.show() model.save('cam_model.pth')