def get_single_img_map(cfg_path, det, label_csv_mame, params): map_frame = pd.DataFrame(columns=['mAP', 'ap']) assert params["height"] % 32 == 0 assert params["height"] > 32 blocks = parse_cfg(cfg_path) model = yolo_v3(params, blocks) # model.load_weights("../4Weights/yolov3.weights") if params['cuda']: model = model.cuda() for parameter in model.parameters(): parameter.requires_grad = False # yolo v3 down size the imput images 32 strides, therefore the input needs # to be a multiplier of 32 and > 32 # put to evaluation mode so that gradient wont be calculated model.eval() transform = transforms.Compose([ LetterBoxImage_cv([params['height'], params['height']]), ImgToTensorCv(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) data = CustDataCV(label_csv_mame, transform=transform, detection_phase=True) data_loader = DataLoader(data, shuffle=False, batch_size=1, collate_fn=my_collate_detection, num_workers=0, worker_init_fn=worker_init_fn) for original_imgs, images, imlist, im_dim_list, labels in data_loader: if model.params['cuda']: images = images.cuda() prediction = model(images) output = filter_results(prediction, params) mAP, ap = simgle_img_map(model, output, labels) map_frame.loc[imlist[-1], 'mAP'] = mAP map_frame.loc[imlist[-1], 'ap'] = ap return map_frame
# %% for image_index, batch in enumerate(im_batches): # imagefilename = list_images_current[image_index] # list_image_index_current # load the image start = time.time() if cuda: batch = batch.cuda() prediction = model(batch, cuda) # prediction = filter_results(prediction, confidence, nms_thesh) prediction = filter_results(prediction, confidence, num_classes, nms_thesh) end = time.time() if type(prediction) == int: for im_num, image in enumerate( list_images_current[image_index * batch_size:min( (image_index + 1) * batch_size, len(list_images_current))]): im_id = image_index * batch_size + im_num print("{0:20s} predicted in {1:6.3f} seconds".format( image.split("/")[-1], (end - start) / batch_size)) print("{0:20s} {1:s}".format("Objects Detected:", "")) print( "----------------------------------------------------------"
start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img = prep_image(frame, inp_dim) # cv2.imshow("a", frame) im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if cuda: im_dim = im_dim.cuda() img = img.cuda() output = model(img, cuda) output = filter_results(output, confidence, num_classes, nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.4f}".format(frames / (time.time() - start))) cv2.imshow("frame", frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim -
def run_webcam(cfg_path, param_path, colors, count_delay, nms_thesh=False, confidence=False, count_time=10): with open(param_path) as fp: params = json.load(fp) if type(params['anchors']) == list: params['anchors'] = np.array(params['anchors']) blocks = parse_cfg(cfg_path) model = yolo_v3(params, blocks) for parameter in model.parameters(): parameter.requires_grad = False if params['cuda']: model = model.cuda() transform = transforms.Compose([ LetterboxImage_cv([params['height'], params['height']]), ImgToTensorCv(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) assert params['height'] % 32 == 0 assert params['height'] > 32 cap = cv2.VideoCapture(0) fps_list = [] add = False print(f"this is height {params['height']}") if params['height'] > 480: # 720 p cap.set(3, 1280) else: # 480 p cap.set(3, 800) print(f"width :{cap.get(3)}") print(f"height :{cap.get(4)}") assert cap.isOpened(), 'Cannot capture source' start = time.time() count_start_time = start + count_delay if not confidence: confidence = params['confidence'] if not nms_thesh: nms_thesh = params['nms_thesh'] while cap.isOpened(): ret, frame = cap.read() frame = cv2.flip(frame, 1) if ret: # we need to unsqueeze to add batch size dimension to tensor in # order to fit into pytorch data format img = transform(frame).unsqueeze(0) im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if params['cuda']: im_dim = im_dim.cuda() img = img.cuda() output = model(img) output = filter_results(output, params) # when there is no dection if type(output) == int: cv2.imshow("frame", frame) fps = (1 / (time.time() - start)) # print(f"FPS of the video is {fps:5.4f}") if add: fps_list.append(fps) if (time.time() - count_start_time) > count_time: print(f"avg_fps: {np.mean(fps_list):5.4f}") cv2.destroyAllWindows() cap.release() return np.mean(fps_list) break elif time.time() > count_start_time: count_start_time = time.time() fps_list.append(fps) add = True start = time.time() key = cv2.waitKey(1) & 0xFF if key == 27: break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(params['height'] / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (params['height'] - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (params['height'] - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) list(map(lambda x: write(x, frame, params['classes']), output)) cv2.imshow("frame", frame) key = cv2.waitKey(1) & 0xff fps = 1 / (time.time() - start) # print(f"FPS of the video is {fps:5.4f}") if add: fps_list.append(fps) if (time.time() - count_start_time) > count_time: print(f"avg_fps: {np.mean(fps_list):5.4f}") cv2.destroyAllWindows() cap.release() return np.mean(fps_list) break elif time.time() > count_start_time: count_start_time = time.time() fps_list.append(fps) add = True start = time.time() if key == 27: break else: break
def get_map(model, dataloader, cuda, conf_list, nms_conf, classes, train=False): if not train: rows = classes + ["mAP"] map_frame = pd.DataFrame(index=rows, columns=conf_list) else: map_frame = None model.eval() num_classes = model.num_classes all_detections = [] len_conf_list = len(conf_list) for _ in range(len_conf_list): all_detections.append([]) all_annotations = [] sum_map = 0 with torch.no_grad(): for samples in dataloader: if cuda: image, labels = samples["image"].to('cuda'), samples["label"] else: image, labels = samples["image"], samples["label"] img_size = image.shape[-1] outputs = model(image, cuda) for conf_index, confidence in enumerate(conf_list): for img in outputs: all_detections[conf_index].append( [np.array([]) for _ in range(num_classes)]) outputs_ = filter_results(img.unsqueeze(0), confidence, num_classes, nms_conf) if outputs_ is not 0: # Get predicted boxes, confidence scores and labels pred_boxes = outputs_[:, 1:6].cpu().numpy() scores = outputs_[:, 5].cpu().numpy() pred_labels = outputs_[:, 7].cpu().numpy() # Order by confidence sort_i = np.argsort(scores) pred_labels = pred_labels[sort_i] pred_boxes = pred_boxes[sort_i] for c in range(num_classes): all_detections[conf_index][-1][c] = pred_boxes[ pred_labels == c] # get all labels for a batch for label_ in labels: all_annotations.append( [np.array([]) for _ in range(num_classes)]) if any(label_[:, -1] > 0): annotation_labels = label_[label_[:, -1] > 0, 0] _annotation_boxes = label_[label_[:, -1] > 0, 1:] # Reformat to x1, y1, x2, y2 and rescale to image dimensions annotation_boxes = np.empty_like(_annotation_boxes) annotation_boxes[:, 0] = _annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2 annotation_boxes[:, 1] = _annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2 annotation_boxes[:, 2] = _annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2 annotation_boxes[:, 3] = _annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2 annotation_boxes *= img_size for label in range(num_classes): all_annotations[-1][label] = annotation_boxes[ annotation_labels == label, :] for conf_index, confidence in enumerate(conf_list): results = compute_map(num_classes, classes, all_detections, all_annotations, conf_index, confidence, nms_conf, map_frame, train) if conf_index == 0: sum_ap = results[1] else: sum_ap = [a + b for a, b in zip(sum_ap, results[1])] sum_map += results[0] model.train() ap = [x / len_conf_list for x in sum_ap] mAP = sum_map / len_conf_list if train: return sum_ap, sum_map, ap, mAP else: return sum_ap, sum_map, ap, mAP, map_frame
def get_map(model, dataloader, train=False, loop_conf=False, confidence=False): actual_num_labels = 0 if confidence: loop_conf = confidence elif loop_conf: loop_conf = model.params['conf_list'] else: loop_conf = [model.params['specific_conf']] if not train: rows = model.params['classes'] + ["mAP"] map_frame = pd.DataFrame(index=rows, columns=loop_conf) else: map_frame = None model.eval() num_classes = model.params['num_classes'] all_detections = [] specific_conf_map = None specific_conf_ap = None len_conf_list = len(loop_conf) for _ in range(len_conf_list): all_detections.append([]) all_annotations = [] with torch.no_grad(): for samples in dataloader: if model.params['cuda']: image, labels = samples["image"].to('cuda'), samples["label"] else: image, labels = samples["image"], samples["label"] img_size = image.shape[-1] outputs = model(image) for conf_index, confidence in enumerate(loop_conf): # gets output at each object confidence model.params['confidence'] = confidence for img in outputs: all_detections[conf_index].append( [np.array([]) for _ in range(num_classes)]) outputs_ = filter_results(img.unsqueeze(0), model.params) # n our model if no results it outputs int 0 if outputs_ is not 0: # Get predicted boxes, confidence scores and labels pred_boxes = outputs_[:, 1:6].cpu().numpy() scores = outputs_[:, 5].cpu().numpy() pred_labels = outputs_[:, 7].cpu().numpy() # Order by confidence sort_i = np.argsort(scores) pred_labels = pred_labels[sort_i] pred_boxes = pred_boxes[sort_i] for c in range(num_classes): all_detections[conf_index][-1][c] = pred_boxes[ pred_labels == c] # get all labels for a batch for label_ in labels: all_annotations.append( [np.array([]) for _ in range(num_classes)]) if any(label_[:, -1] > 0): annotation_labels = label_[label_[:, -1] > 0, 0] _annotation_boxes = label_[label_[:, -1] > 0, 1:] num_labels = len(np.unique(annotation_labels)) # Reformat to x1, y1, x2, y2 and rescale to image dim annotation_boxes = np.empty_like(_annotation_boxes) annotation_boxes[:, 0] = _annotation_boxes[:, 0] - _annotation_boxes[:, 2] / 2 annotation_boxes[:, 1] = _annotation_boxes[:, 1] - _annotation_boxes[:, 3] / 2 annotation_boxes[:, 2] = _annotation_boxes[:, 0] + _annotation_boxes[:, 2] / 2 annotation_boxes[:, 3] = _annotation_boxes[:, 1] + _annotation_boxes[:, 3] / 2 annotation_boxes *= img_size for label in range(num_classes): all_annotations[-1][label] = annotation_boxes[ annotation_labels == label, :] actual_num_labels = np.max([actual_num_labels, num_labels]) for conf_index, confidence in enumerate(loop_conf): model.params['confidence'] = confidence print(f"Running for object confidence : {confidence}") # if train it results consists mAP, average_precisions map_frame # else: mAP, average_precisions print(f"actual_num_labels : {actual_num_labels}") results = compute_map(all_detections, all_annotations, conf_index, map_frame, train, actual_num_labels, model.params) if conf_index == 0: best_map = results[0] best_ap = results[1] best_conf = confidence else: if results[0] > best_map: best_map = results[0] best_ap = results[1] best_conf = confidence if np.round(confidence, 3) == model.params['specific_conf']: specific_conf_map = results[0] specific_conf_ap = results[1] model.train() if train: return best_map, best_ap, best_conf, specific_conf_map,\ specific_conf_ap else: return best_map, best_ap, best_conf, specific_conf_map,\ specific_conf_ap, map_frame
def detection(cfg_path, det, label_csv_mame, params): assert params["height"] % 32 == 0 assert params["height"] > 32 blocks = parse_cfg(cfg_path) model = yolo_v3(params, blocks) # model.load_weights("../4Weights/yolov3.weights") if params['cuda']: model = model.cuda() for parameter in model.parameters(): parameter.requires_grad = False # yolo v3 down size the imput images 32 strides, therefore the input needs # to be a multiplier of 32 and > 32 # put to evaluation mode so that gradient wont be calculated model.eval() transform = transforms.Compose([ LetterBoxImage_cv([params['height'], params['height']]), ImgToTensorCv(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) data = CustDataCV(label_csv_mame, transform=transform, detection_phase=True) data_loader = DataLoader(data, shuffle=False, batch_size=model.params['batch_size'], collate_fn=my_collate_detection, num_workers=0, worker_init_fn=worker_init_fn) for original_imgs, images, imlist, im_dim_list, labels in data_loader: im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) if model.params['cuda']: images = images.cuda() im_dim_list = im_dim_list.cuda() prediction = model(images) model.params['num_classes'] output = filter_results(prediction, params) if type(output) != int: im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(params["height"] / im_dim_list, 1)[0].view(-1, 1) # Clamp all elements in input into the range [ min, max ] and # return a resulting tensor: # it is to make sure the min max of width and height are not larger # or smaller than the image boundary output[:, [1, 3]] -= (params["height"] - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (params["height"] - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) list( map( lambda x: detection_write(x, original_imgs, params[ "classes"]), output)) det_names = pd.Series(imlist).apply( lambda x: "{}/{}".format(det, x.split("/")[-1])) list(map(cv2.imwrite, det_names, original_imgs)) if model.params['cuda']: torch.cuda.empty_cache()