def detect_images(widget=None): if not widget: args = arg_parse() else: args = widget.args read_dir = time.time() print_info(widget, False, "info", "Reading addresses.....") images = args.images im_list = [] img = None try: for img in images: if os.path.isabs(img): im_list.append(img) else: im_list.append(osp.join(osp.realpath('.'), img)) except FileNotFoundError: print_info(widget, True, "error", "No file or directory with the name {}".format(img)) if not os.path.exists(args.det): os.makedirs(args.det) print_info(widget, False, "info", "Finished reading addresses") finish_read_dir = time.time() batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) namesfile = args.names cuda_present = torch.cuda.is_available() classes = load_classes(namesfile) num_classes = len(classes) # Set up the neural network load_net = time.time() print_info(widget, False, "info", "Loading network.....") model = Darknet(args.cfg) model.load_weights(args.weights) print_info(widget, False, "info", "Network successfully loaded") finish_load_net = time.time() model.net_info["height"] = args.reso model.net_info["width"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # If there's a GPU availible, put the model on GPU if cuda_present: model.cuda() # Set the model in evaluation mode (for Batchnorm layers) model.eval() # Detection phase load_batch = time.time() print_info(widget, False, "info", "Loading batches.....") loaded_ims = [cv2.imread(x) for x in im_list] im_batches = list(map(prep_image, loaded_ims, [inp_dim for _ in range(len(im_list))])) im_dim_list = [(x.shape[1], x.shape[0]) for x in loaded_ims] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) leftover = 0 if len(im_dim_list) % batch_size: leftover = 1 if batch_size != 1: num_batches = len(im_list) // batch_size + leftover im_batches = [torch.cat((im_batches[i * batch_size: min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches)] if cuda_present: im_dim_list = im_dim_list.cuda() output = torch.empty((0, 0)) print_info(widget, False, "info", "Finished loading batches....") start_det_loop = time.time() for i, batch in enumerate(im_batches): # load the image start = time.time() print_info(widget, False, "info", f"Detecting batch no {i}....") if cuda_present: batch = batch.cuda() with torch.no_grad(): prediction = model(batch, cuda_present) prediction = write_results(prediction, confidence, num_classes, nms_conf=nms_thesh) end = time.time() if type(prediction) == int: for im_num, image in enumerate(im_list[i * batch_size: min((i + 1) * batch_size, len(im_list))]): im_id = i * batch_size + im_num msg = "{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / batch_size) msg += "\n{0:20s} {1:s}".format("Objects Detected:", "") msg += "\n----------------------------------------------------------" print_info(widget, False, 'batch_info', msg, im_id) continue prediction[:, 0] += i * batch_size # transform the atribute from index in batch to index in imlist if np.size(output, 0) == 0: # If we have't initialised output output = prediction else: output = torch.cat((output, prediction)) for im_num, image in enumerate(im_list[i * batch_size: min((i + 1) * batch_size, len(im_list))]): im_id = i * batch_size + im_num objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id] msg = "{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start) / batch_size) msg += "\n{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)) msg += "\n----------------------------------------------------------" print_info(widget, False, 'batch_info', msg, im_id) if cuda_present: torch.cuda.synchronize() print_info(widget, False, "info", f"Finished detecting batch no {i}") if np.size(output, 0) == 0: print_info(widget, False, 'no_detections', "No detections were made") print_info(widget, False, 'finished') return # Start rescaling print_info(widget, False, "info", "Output processing....") output_rescale = time.time() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor # set padding space black for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) class_load = time.time() print_info(widget, False, "info", "Finished output processing.") # Start draw print_info(widget, False, "info", "Drawing boxes....") draw = time.time() images_handler = ImagesHandler(classes, output, loaded_ims, args.det, im_list, batch_size) images_handler.write() print_info(widget, False, "images_ready", images_handler.imageList) end = time.time() print_info(widget, False, "info", "Finished drawing boxes") msg = "\n\nSUMMARY" msg += "\n----------------------------------------------------------" msg += "\n{:25s}: {}".format("Task", "Time Taken (in seconds)") msg += "\n" msg += "\n{:25s}: {:2.3f}".format("Reading addresses", finish_read_dir - read_dir) msg += "\n{:25s}: {:2.3f}".format("Loading network", finish_load_net - load_net) msg += "\n{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch) msg += "\n{:25s}: {:2.3f}".format("Detection (" + str(len(im_list)) + " images)", output_rescale - start_det_loop) msg += "\n{:25s}: {:2.3f}".format("Output Processing", class_load - output_rescale) msg += "\n{:25s}: {:2.3f}".format("Drawing Boxes", end - draw) msg += "\n{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch) / len(im_list)) msg += "\n----------------------------------------------------------" print_info(widget, False, 'info', msg) torch.cuda.empty_cache() print_info(widget, False, 'finished')
help="the image to predict (default: %(default)s)") parser.add_argument("--weight", required=True, metavar="/path/to/yolov4.weights", help="the path of weight file") parser.add_argument("--save-img", metavar="predicted-img", help="the path to save predicted image") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() img: Image.Image = Image.open(args.img_file) img = img.resize((608, 608)) # C*H*W img_data = to_image(img) net = Darknet(img_data.size(0)) net.load_weights(args.weight) net.eval() with torch.no_grad(): boxes, confs = net(img_data.unsqueeze(0)) idxes_pred, boxes_pred, probs_pred = utils.post_processing(boxes, confs, 0.4, 0.6) utils.plot_box(boxes_pred, args.img_file, args.save_img)
def detect(kitti_weights='../checkpoints/best_weights_kitti.pth', config_path='../config/yolov3-kitti.cfg', class_path='../data/names.txt'): """ Script to run inference on sample images. It will store all the inference results in /output directory ( relative to repo root) Args kitti_weights: Path of weights config_path: Yolo configuration file path class_path: Path of class names txt file """ cuda = torch.cuda.is_available() os.makedirs('../output', exist_ok=True) # Set up model model = Darknet(config_path, img_size=416) model.load_weights(kitti_weights) if cuda: model.cuda() print("Cuda available for inference") model.eval() # Set in evaluation mode dataloader = DataLoader(ImageFolder("../data/samples/", img_size=416), batch_size=2, shuffle=False, num_workers=0) classes = load_classes(class_path) # Extracts class labels from file Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index print('data size : %d' % len(dataloader)) print('\nPerforming object detection:') prev_time = time.time() for batch_i, (img_paths, input_imgs) in enumerate(dataloader): # Configure input input_imgs = Variable(input_imgs.type(Tensor)) # Get detections with torch.no_grad(): detections = model(input_imgs) detections = non_max_suppression(detections, 80, 0.8, 0.4) # print(detections) # Log progress current_time = time.time() inference_time = datetime.timedelta(seconds=current_time - prev_time) prev_time = current_time print('\t+ Batch %d, Inference Time: %s' % (batch_i, inference_time)) # Save image and detections imgs.extend(img_paths) img_detections.extend(detections) # Bounding-box colors # cmap = plt.get_cmap('tab20b') cmap = plt.get_cmap('tab10') colors = [cmap(i) for i in np.linspace(0, 1, 20)] print('\nSaving images:') # Iterate through images and save plot of detections for img_i, (path, detections) in enumerate(zip(imgs, img_detections)): print("(%d) Image: '%s'" % (img_i, path)) # Create plot img = np.array(Image.open(path)) plt.figure() fig, ax = plt.subplots(1) ax.imshow(img) kitti_img_size = 416 # The amount of padding that was added pad_x = max(img.shape[0] - img.shape[1], 0) * (kitti_img_size / max(img.shape)) pad_y = max(img.shape[1] - img.shape[0], 0) * (kitti_img_size / max(img.shape)) # Image height and width after padding is removed unpad_h = kitti_img_size - pad_y unpad_w = kitti_img_size - pad_x # Draw bounding boxes and labels of detections if detections is not None: print(type(detections)) print(detections.size()) unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) bbox_colors = random.sample(colors, n_cls_preds) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: print('\t+ Label: %s, Conf: %.5f' % (classes[int(cls_pred)], cls_conf.item())) # Rescale coordinates to original dimensions box_h = int(((y2 - y1) / unpad_h) * (img.shape[0])) box_w = int(((x2 - x1) / unpad_w) * (img.shape[1])) y1 = int(((y1 - pad_y // 2) / unpad_h) * (img.shape[0])) x1 = int(((x1 - pad_x // 2) / unpad_w) * (img.shape[1])) color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # Create a Rectangle patch bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor='none') # Add the bbox to the plot ax.add_patch(bbox) # Add label plt.text(x1, y1 - 30, s=classes[int(cls_pred)] + ' ' + str('%.4f' % cls_conf.item()), color='white', verticalalignment='top', bbox={'color': color, 'pad': 0}) # Save generated image with detections plt.axis('off') plt.gca().xaxis.set_major_locator(NullLocator()) plt.gca().yaxis.set_major_locator(NullLocator()) plt.savefig('../output/%d.png' % (img_i), bbox_inches='tight', pad_inches=0.0) plt.close()
class ReadFramesThread: def __init__(self, path, args, with_tracking, widget, queue_size=3000): # initialize the file video stream along with the boolean # used to indicate if the thread should be stopped or not self.stream = cv2.VideoCapture(path) self.widget = widget self.tracking = with_tracking if not self.stream: if type(path) == int: print_info(widget, True, "error", f"Error opening web cam on {path}") else: print_info(widget, True, "error", f"Error opening video file {path}") self.stopped = False self.canceled = False self.paused = False self.ready = False # initialize the queue used to store frames read from # the video file self.Q = Queue(maxsize=queue_size) self.imread = Queue(maxsize=queue_size) self.Q_processed = Queue(maxsize=queue_size) self.inp_dim = int(args.reso) self.batch_size = int(args.bs) self.names_file = args.names self.confidence = float(args.confidence) self.nms_thresh = float(args.nms_thresh) self.is_classifier = args.is_classifier self.classes = load_classes(self.names_file) self.num_classes = len(self.classes) self.model = None self.model_classifier = None if self.is_classifier: print_info(widget, False, "info", "Loading network for detection.....", -1) self.model = Darknet(args.classifier_cfg) self.model.load_weights(args.classifier_weights) print_info(widget, False, "info", "Network for detection successfully loaded", 0) print_info(widget, False, "info", "Loading network for classification.....", -1) self.model_classifier = Darknet(args.cfg) self.model_classifier.load_weights(args.weights) print_info(widget, False, "info", "Network for classification successfully loaded", 0) self.model_classifier.net_info["height"] = args.reso self.inp_dim = int(self.model_classifier.net_info["height"]) # If there's a GPU availible, put the model on GPU self.cuda = torch.cuda.is_available() if self.cuda: self.model_classifier.cuda() # Set the model in evaluation mode self.model_classifier.eval() self.classifier_confidence = self.confidence self.classifier_nms_thesh = self.nms_thresh self.classifier_classes = self.classes self.classifier_num_classes = self.num_classes self.classifier_names_file = self.names_file self.classifier_inp_dim = self.inp_dim self.inp_dim = args.classifier_inp_dim self.confidence = args.classifier_confidence self.nms_thresh = args.classifier_nms_thresh self.names_file = args.classifier_names self.classes = load_classes(self.names_file) self.num_classes = len(self.classes) else: print_info(widget, False, "info", "Loading network.....", -1) self.model = Darknet(args.cfg) self.model.load_weights(args.weights) print_info(widget, False, "info", "Network successfully loaded", 0) self.model.net_info["height"] = self.inp_dim assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 # If there's a GPU availible, put the model on GPU self.cuda = torch.cuda.is_available() if self.cuda: self.model.cuda() # Set the model in evaluation mode self.model.eval() # if tracking selected, initialize sort class self.mot_tracking = None if self.tracking == "sort": self.mot_tracking = Sort(max_age=30, min_hits=3) elif self.tracking == "deep_sort": print_info(widget, False, "info", "Loading Deep Sort model ...", -1) self.mot_tracking = DeepSort() print_info(widget, False, "info", "Deep Sort model loaded", -1) def start(self): # start a thread to read frames from the file video stream t = Thread(target=self.update, args=()) # t.daemon = True t.start() return self def update(self): frames = 0 start = time.time() print_info(self.widget, False, "info", "Began capturing", -2) # keep looping infinitely while True: # if the thread indicator variable is set, stop the # thread if self.stopped: break if self.canceled: current_time = time.time() print_info(self.widget, False, "info", "Canceled processing", current_time - start) return if self.paused: self.widget.obj.pauseMutex.lock() self.widget.obj.pauseCond.wait(self.widget.obj.pauseMutex) self.widget.obj.pauseMutex.unlock() self.paused = False # otherwise, ensure the queue has room in it if not self.Q.full(): # read the next frame from the file (grabbed, frame) = self.stream.read() # if the `grabbed` boolean is `False`, then we have # reached the end of the video file if not grabbed: self.stop() self.ready = True return # add the frame to the queue self.Q.put(prep_image(frame, self.inp_dim)) self.imread.put(frame) frames += 1 current_time = time.time() msg = " FPS of the video is {:5.4f}".format( frames / (current_time - start)) print_info(self.widget, False, "info", msg, current_time - start) if frames % self.batch_size == 0: self.process_frames() if not self.Q.empty(): self.process_frames() def read(self): # return next frame in the queue return self.Q.get() def more(self): # return True if there are still frames in the queue return self.Q.qsize() > 0 def stop(self): # indicate that the thread should be stopped self.stopped = True def cancel(self): self.canceled = True def pause(self): self.paused = True def has_batch(self): if self.Q.qsize() >= self.batch_size: return True if self.Q.qsize() > 0 and self.stopped: return True return False def get_batch(self): if (self.Q.qsize() >= self.batch_size) or (self.Q.qsize() > 0 and self.stopped): res = np.empty((0, 0)) im_dim_list = [] imread_list = [] for _ in range(self.batch_size): img = self.Q.get() if np.size(res, 0) == 0: res = img else: res = torch.cat((res, img)) img = self.imread.get() im_dim_list.append((img.shape[1], img.shape[0])) imread_list.append(img) im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) return res, im_dim_list, imread_list return False, False, False def process_frames(self): batch_nr = -1 batch, im_dims, imread = self.get_batch() if imread: batch_nr += 1 if self.cuda: im_dims = im_dims.cuda() batch = batch.cuda() with torch.no_grad(): output = self.model(batch, self.cuda) for frame_id in range(np.size(output, 0)): nr_frame = self.batch_size * batch_nr + frame_id + 1 im_dim = im_dims[frame_id] frame = output[frame_id].unsqueeze(0) frame = write_results(frame, self.confidence, self.num_classes, nms_conf=self.nms_thresh) if np.size(frame, 0) > 0: im_dim = im_dim.repeat(frame.size(0), 1) scaling_factor = torch.min(416 / im_dim, 1)[0].view(-1, 1) frame[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 frame[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 frame[:, 1:5] /= scaling_factor for i in range(frame.shape[0]): frame[i, [1, 3]] = torch.clamp(frame[i, [1, 3]], 0.0, im_dim[i, 0]) frame[i, [2, 4]] = torch.clamp(frame[i, [2, 4]], 0.0, im_dim[i, 1]) if self.is_classifier: frame = self.apply_classifier_model( imread[frame_id], frame) if self.tracking == "sort": if self.cuda: frame = frame.cpu() frame = self.mot_tracking.update(frame) if self.cuda: frame = torch.from_numpy(frame).cuda() elif self.tracking == "deep_sort": if self.cuda: frame = frame.cpu() tracker, detections_class = self.mot_tracking.update( imread[frame_id], frame) frame = [] for track in tracker.tracks: if not track.is_confirmed( ) or track.time_since_update > 1: continue bbox = track.to_tlbr( ) # Get the corrected/predicted bounding box id_num = int(track.track_id ) # Get the ID for the particular track. # Draw bbox from tracker. frame.append( np.concatenate(([id_num + 1], bbox, [ track.conf_score, track.class_score, track.cid ])).reshape(1, -1)) if len(frame) > 0: frame = np.concatenate(frame) if self.cuda: frame = torch.from_numpy(frame).cuda() else: frame = torch.empty((0, 8)) if np.size(frame, 0) == 0: image_handler = ImageHandler(nr_frame, batch_nr, f"frame{nr_frame}", imread[frame_id], self.tracking) self.Q_processed.put(image_handler) continue image_handler = ImageHandler(nr_frame, batch_nr, f"frame{nr_frame}", imread[frame_id], self.tracking) if self.is_classifier: image_handler.write(frame, self.classifier_classes) else: image_handler.write(frame, self.classes) self.Q_processed.put(image_handler) def get_image(self): return self.Q_processed.get() def has_images(self): return not self.Q_processed.empty() def apply_classifier_model(self, imread, frame): # get crops from detections in frame crops = torch.empty((0, 0)) detections = frame[:, 1:5] for d in detections: for i in range(len(d)): if d[i] < 0: d[i] = 0 img_h, img_w, img_ch = imread.shape xmin, ymin, xmax, ymax = d if xmin > img_w: xmin = img_w if ymin > img_h: ymin = img_h ymin = abs(int(ymin)) ymax = abs(int(ymax)) xmin = abs(int(xmin)) xmax = abs(int(xmax)) try: crop = imread[ymin:ymax, xmin:xmax, :] crop = prep_image(crop, self.classifier_inp_dim) if np.size(crops, 0) == 0: crops = crop else: crops = torch.cat((crops, crop)) except: continue if self.cuda: crops = crops.cuda() with torch.no_grad(): output = self.model_classifier(crops, self.cuda) for frame_id in range(np.size(output, 0)): new_det = output[frame_id].unsqueeze(0) new_det = write_results(new_det, self.classifier_confidence, self.classifier_num_classes, nms_conf=self.classifier_nms_thesh) if np.size(new_det, 0) > 0: index = torch.argmax(new_det[:, 6]) frame[frame_id, 6:8] = new_det[index, 6:8] else: frame[frame_id, 6] = -1 frame = frame[frame[:, 6] >= 0] return frame
left_over = 0 if (len(loaded_imgs) % batch_size): left_over = 1 num_batches = (len(loaded_imgs) // batch_size) + left_over batches = [ det_imgs[i * batch_size:min((i + 1) * batch_size, len(loaded_imgs))] for i in range(num_batches) ] batches = [torch.cat(batch, 0) for batch in batches] net = Darknet() net = net.cuda() net.load_weights() net.eval() write = 0 def put_rectangle(x, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(classes[cls]) cv2.rectangle(img, c1, c2, [255, 0, 0], 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, [0, 255, 0], -1)
def main(train_path="../data/train/images/", val_path="../data/train/images/", labels_path="../data/train/yolo_labels/", weights_path="../checkpoints/", preload_weights_file="darknet53.conv.74", output_path="../output", yolo_config_file="../config/yolov3-kitti.cfg", fraction=1, learning_rate=1e-3, weight_decay=1e-4, batch_size=2, epochs=30, freeze_struct=[True, 5]): """ This is the point of entry to the neural network program. All the training history will be saved as a csv in the output path Args train_path (string): Directory containing the training images val_path (string):: Directory containing the val images labels_path (string):: Directory containing the yolo format labels for data weights_path (string):: Directory containing the weights (new weights for this program will also be added here) preload_weights_file (string): Name of preload weights file output_path (string): Directory to store the training history outputs as csv yolo_config_file (string): file path of yolo configuration file fraction (float): fraction of data to use for training learning_rate (float): initial learning rate weight_decay (float): weight decay value batch_size (int): batch_size for both training and validation epochs (int): maximum number of epochs to train the model freeze_struct (list): [bool, int] indicating whether to freeze the Darknet backbone and until which epoch should it be frozen Returns None """ # Set up checkpoints path checkpoints_path = weights_path # Set up env variables and create required directories os.makedirs(output_path, exist_ok=True) os.makedirs(checkpoints_path, exist_ok=True) # Set up cuda use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("Available device = ", device) # Create model and load pretrained darknet weights model = Darknet(yolo_config_file) print("Loading imagenet weights to darknet") model.load_weights(os.path.join(weights_path, preload_weights_file)) model.to(device) #print(model) # Create datasets train_dataset = KITTI2D(train_path, labels_path, fraction=fraction, train=True) valid_dataset = KITTI2D(val_path, labels_path, fraction=fraction, train=False) # Create dataloaders train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False) # Create optimizers optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, weight_decay=weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 10) # Create log csv files train_log_file = open(os.path.join(output_path, "train_results.csv"), "w", newline="") valid_log_file = open(os.path.join(output_path, "valid_results.csv"), "w", newline="") train_csv = csv.writer(train_log_file) valid_csv = csv.writer(valid_log_file) print("Starting to train yolov3 model...") # Train model here train_model(model, device, optimizer, lr_scheduler, train_dataloader, valid_dataloader, train_csv, valid_csv, weights_path, max_epochs=epochs, tensor_type=torch.cuda.FloatTensor, update_gradient_samples=1, freeze_darknet=freeze_struct[0], freeze_epoch=freeze_struct[1]) # Close the log files train_log_file.close() valid_log_file.close() print("Training completed")