def run_nms(test_images_dict: dict) -> dict: """ Runs non-maximum suppression on all of the predicted boxes/scores for each of the class-categories. It does this... (1) for each of the images (2) for each image's windows Stores the results in test_images dict This should prune any boxes that have the same class-category and that overlap considerably and it should favor the higher-scoring boxes. """ # run non-maximum suppression per image for image_id, image_info in test_images_dict.items(): image_indices = [] # the total list of indices to keep # for each class-category, run non-maximum suppression and add resulting indices to total list of indices for (predicted_boxes, predicted_scores) in get_same_class_annotations(image_info): image_class_indices = nms.boxes(predicted_boxes, predicted_scores) image_class_indices = [ index for index in image_class_indices if predicted_scores[index] != 0.0 ] # filter out placeholders image_indices.extend(image_class_indices) # only keep specified indices of annotations test_images_dict[image_id]['predicted_boxes'] = \ [box for index, box in enumerate(image_info['predicted_boxes']) if index in image_indices] test_images_dict[image_id]['predicted_classes'] = \ [category for index, category in enumerate(image_info['predicted_classes']) if index in image_indices] test_images_dict[image_id]['predicted_scores'] = \ [score for index, score in enumerate(image_info['predicted_scores']) if index in image_indices] # run non-maximum suppression per-window for window_id, window_info in image_info['windows'].items(): window_indices = [] # the total list of indices to keep # for each class-category, run non-maximum suppression and add resulting indices to total list of indices for (predicted_boxes, predicted_scores) in get_same_class_annotations(window_info): window_class_indices = nms.boxes(predicted_boxes, predicted_scores) window_class_indices = [ index for index in window_class_indices if predicted_scores[index] != 0.0 ] # filter out placeholders window_indices.extend(window_class_indices) test_images_dict[image_id]['windows'][window_id]['predicted_boxes'] = \ [box for index, box in enumerate(window_info['predicted_boxes']) if index in window_indices] test_images_dict[image_id]['windows'][window_id]['predicted_classes'] = \ [category for index, category in enumerate(window_info['predicted_classes']) if index in window_indices] test_images_dict[image_id]['windows'][window_id]['predicted_scores'] = \ [score for index, score in enumerate(window_info['predicted_scores']) if index in window_indices] return test_images_dict
def doubleDetect(darknet_image, darknet_image_long, frame_rgb): global netMain, metaMain detections = [] nmsdet = [] # Sequential Processing # run detections nmsdet += detect(netMain, metaMain, darknet_image, 0) detections += detect(netMain, metaMain, darknet_image_long, 1) # convert detections to numpy array if len(detections): dets = np.array(detections) # run non maximum supression to eliminate overlapping bounding boxes # returns indexes idx = nms.boxes(dets[:, :4], dets[:, -1:], nms_algorithm=fast.nms) # get best bounding boxes from nms for i in idx: nmsdet.append(detections[i]) # draw all bounding boxes image = cvDrawBoxes(nmsdet, img=frame_rgb, color="g") # image = cvDrawBoxes(detections, img=frame_rgb, color="r") return image
def get_boxes(self, image_path): image = cv2.imread(image_path) (origHeight, origWidth) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (self.width, self.height) ratioWidth = origWidth / float(newW) ratioHeight = origHeight / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (imageHeight, imageWidth) = image.shape[:2] blob = cv2.dnn.blobFromImage(image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() self.net.setInput(blob) (scores, geometry) = self.model_downloader.forward(self.net) end = time.time() print(f"time for forward pass:{end-start}") (boxes, confidences, baggage) = decode(scores, geometry, self.min_confidence) functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] indicies = nms.boxes(boxes, confidences, nms_function=functions[0], confidence_threshold=self.min_confidence, nsm_threshold=self.min_nms_treshold) return [scale_box(boxes[i], ratioWidth, ratioHeight) for i in indicies]
def get_predictions(image): original_shape = image.shape image_mask_pred = cv2.resize(image, img_size) display.show(image_mask_pred, "image") image_mask_pred = np.expand_dims(image_mask_pred, axis=0) / 255. pred_masks = mask_cnn.predict(image_mask_pred, batch_size=8) boxes, box_scores = get_boxes(pred_masks[0:1], num_boxes, box_threshold) boxes = rescale_coords(boxes, original_shape, img_size) box_image = draw_boxes(image, boxes) display.show(box_image, "boxes") char_images = get_chars_from_image(image, boxes) char_prediction_scores = [] if len(boxes) != 0: char_prediction_scores = char_cnn.predict(np.asarray(char_images)) #display.show(join_images(char_images), "chars") cindices = [np.argmax(score) for score in char_prediction_scores] char_scores = [score[i] for i, score in zip(cindices, char_prediction_scores)] chars = [index2char[cindex] for cindex in cindices] lists = [chars, boxes, char_scores, char_images] chars, boxes, char_scores, char_images = filter_by_threshold(10.0/n_chars, char_scores, lists) nms_indices = nms.boxes(boxes, char_scores) chars, boxes, char_scores, char_images = filter_by_indices(nms_indices, lists) display.show(box_image, "missing boxes") return chars, boxes, char_scores, char_images
def _analyze(self, img=None, *args, **kwargs): assert img is not None, ValueError('img can not be None') img = F.to_tensor(img) output = self.model([img])[0] for key, item in output.items(): if self.device == 'cuda': item = item.cpu() output[key] = item.detach().numpy() boxes = [[x1, y1, x2 - x1, y2 - y1] for x1, y1, x2, y2 in output['boxes']] scores = output['scores'] rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25) output['boxes'] = [ output['boxes'][id] for id in rects if output['scores'][id] > 0.5 ] output['labels'] = [ output['labels'][id] for id in rects if output['scores'][id] > 0.5 ] output['scores'] = [ output['scores'][id] for id in rects if output['scores'][id] > 0.5 ] return output
def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2): '''get boxes from feature map Input: score : score map from model <numpy.ndarray, (1,row,col)> geo : geo map from model <numpy.ndarray, (5,row,col)> score_thresh: threshold to segment score map nms_thresh : threshold in nms Output: boxes : final polys <numpy.ndarray, (n,9)> ''' score = score[0, :, :] xy_text = np.argwhere(score > score_thresh) # n x 2, format is [r, c] if xy_text.size == 0: return None xy_text = xy_text[np.argsort(xy_text[:, 0])] valid_pos = xy_text[:, ::-1].copy() # n x 2, [x, y] valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]] # 5 x n polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) if polys_restored.size == 0: return None boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = polys_restored boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]] rect = [[ boxes[i, 0], boxes[i, 1], abs(boxes[i, 2] - boxes[i, 0]), abs(boxes[i, 3] - boxes[i, 1]) ] for i in range(boxes.shape[0])] score = boxes[:, 8] ind = fast.boxes(rect, score) to_return = [boxes[i] for i in ind] return np.array(to_return)
def non_maximum_suppression(json_file, save_json_file): from nms import nms data = json.load(open(json_file, "r")) annotations = dict() for annotation in data['annotations']: if annotation['image_id'] in annotations: annotations[annotation['image_id']] += [annotation] else: annotations[annotation['image_id']] = [annotation] image_ids = annotations.keys() new_annotations = list() for image_id in image_ids: boxes = [[int(x) for x in box["bbox"]] for box in annotations[image_id]] scores = [box["score"] for box in annotations[image_id]] indices = nms.boxes(boxes, scores) new_annotations += [annotations[image_id][i] for i in indices] new_data = dict() new_data["images"] = data["images"] new_data["categories"] = data["categories"] new_data["annotations"] = new_annotations with open(save_json_file, "w") as outfile: json.dump(new_data, outfile)
def run_inference(net, img): # run inference of an image - eventually I will have to change #print("running inference") show=False vehicle_classes = ['car', 'vehicle,', 'truck', 'bus', 'bike', 'motorbike'] # not sure what this does img2 = Image(img) # running inference: start = timer() results = net.detect(img2) end = timer() results_labels = [x[0].decode("utf-8") for x in results] # finding vehicles vehicle_boxes = np.empty((0,4), int) buffer = 0 # running non max supression scores = [score for cat, score, bound in results] bounds = [bound for cat, score, bound in results] indices = boxes(bounds, scores) results = [results[ind] for ind in indices] # box buffer buffer = 10 for cat, score, bounds in results: x, y, w, h = bounds # x,y are centre. if cat.decode("utf-8") in vehicle_classes: vehicle_boxes = np.append(vehicle_boxes, np.array([[int(x - w / 2) - buffer, int(y - h / 2) - buffer, int(w) + buffer, int(h) + buffer]])) # plotting results if you so wish if show: cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 0, 255), 5) #cv2.putText(img, str(cat.decode("utf-8")),(int(x),int(y)), #cv2.FONT_HERSHEY_COMPLEX,1,(255,255,0)) if show: cv2.imshow("output", img) cv2.waitKey(0) #num_vehicles = int(len(vehicle_boxes)/4) #print("detection took {:f} seconds, {:d} vehicles found".format(end-start,num_vehicles)) return vehicle_boxes
def count_doggo(img_path: str) -> int: face_cascade = cv2.CascadeClassifier(r'..\data\catface.xml') img = cv2.imread(img_path) grayscale = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) objs = [] reject_levels = [] level_weights = [] objs, rej, scores = face_cascade.detectMultiScale3(grayscale, outputRejectLevels=True) indices = boxes([[x, y, x + w, y + h] for (x, y, w, h) in objs], scores) detections = [objs[i] for i in indices] scores = [scores[i] for i in indices] i = 0 for (x, y, w, h) in detections: cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 10) cv2.putText(img, f"{scores[i]}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 2, (36, 255, 12), 4) i += 1 cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.imshow(cv_rgb) return len(detections)
def testing_images(img, file, output_clfs, output_alphas): classification = list() img1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) height, width = img1.shape box_height = 100 box_width = 100 while (box_height < height / 2 and box_width < width / 2): for x in range(0, height, 20): for y in range(0, width, 20): block = img1[x:x + box_height, y:y + box_width] window = cv2.resize(block, (19, 19)) flag, total = detect_face(window, output_clfs, output_alphas) if flag == 1: classification.append([(y, x), (y + block.shape[1], x + block.shape[0]), total]) box_height += 20 box_width += 20 coord = list() scores = list() for res in classification: coord.append((res[0][0], res[0][1], res[1][0], res[1][1])) scores.append(res[2]) output = nms.boxes(coord, scores) output = output[:2] for x in output: if classification[x][2] > 205: json_dict = dict() cv2.rectangle(img, classification[x][0], classification[x][1], (255, 255, 0), 2) print(file) json_dict['iname'] = str(file) width = classification[x][1][0] - classification[x][0][0] height = classification[x][1][1] - classification[x][0][1] json_dict['bbox'] = [ classification[x][0][0], classification[x][0][1], width, height ] # print(json_dict) json_list.append(json_dict) cv2.imwrite("output/" + file, img)
def doubleDetect(darknet_image, darknet_image_crop, frame_rgb): global netMain, metaMain detections = [] nmsdet = [] # Sequential processing detections += detect_sequential(netMain, metaMain, darknet_image, 0) detections += detect_sequential(netMain, metaMain, darknet_image_crop, 1) dets = np.array(detections) if len(dets) > 0: boxes = dets[:,:4] scores = dets[:,-1:] idx = nms.boxes(boxes, scores, nms_algorithm=nms.fast.nms) for i in idx: nmsdet.append(detections[i]) # print("detection done in:",round(finish-start, 3),"s") # Multiprocessing # q = Queue() # processes = [] # p1 = Process(target=detect, args=(q, netMain, metaMain, darknet_image, 0)) # p2 = Process(target=detect, args=(q, netMain, metaMain, darknet_image_crop, 1)) # processes.append(p1) # processes.append(p2) # start = time.perf_counter() # for process in processes: # process.start() # for process in processes: # process.join() # finish = time.perf_counter() # print(round(finish-start,2)) # while not q.empty(): # detections += (q.get()) # # image = cvDrawBoxes(dets, img=frame_rgb, color="r") image = cvDrawBoxes(nmsdet, img=frame_rgb, color="g") return image,np.array(dets)
out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] # j is class index for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep_idx = nms.boxes(c_bboxes, c_scores) keep_idx = keep_idx[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep_idx, :] allboxes.extend([c.tolist() + [j] for c in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] print('\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \ ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)])) fps = 1.0 / float(loop_time) if cam >= 0 or video else -1 im2show = draw_detection(image, boxes, scores, cls_inds, fps)
def detect(self, conf_preds, loc_data, prior_data, mask_data, img_w, img_h): """ Perform nms for only the max scoring class that isn't background (class 0) """ cur_scores = conf_preds[:, 1:] num_class = cur_scores.shape[1] classes = np.argmax(cur_scores, axis=1) conf_scores = cur_scores[range(cur_scores.shape[0]), classes] # filte by confidence_threshold keep = conf_scores > self.confidence_threshold conf_scores = conf_scores[keep] classes = classes[keep] loc_data = loc_data[keep, :] prior_data = prior_data[keep, :] masks = mask_data[keep, :] # decode x, y, w, h boxes = self.decode(loc_data, prior_data, img_w, img_h) # nms for every class boxes_result = [] masks_result = [] classes_result = [] conf_scores_result = [] for i in range(num_class): where = np.where(classes == i) if len(where) == 0: continue boxes_tmp = boxes[where] masks_tmp = masks[where] classes_tmp = classes[where] conf_scores_tmp = conf_scores[where] indexes = nms.boxes( boxes_tmp, conf_scores_tmp, nms_threshold=self.nms_threshold, score_threshold=self.confidence_threshold, top_k=self.keep_top_k, ) for index in indexes: boxes_result.append(boxes_tmp[index]) masks_result.append(masks_tmp[index]) classes_result.append(classes_tmp[index] + 1) conf_scores_result.append(conf_scores_tmp[index]) # keep top k if len(conf_scores_result) > self.keep_top_k: indexes = np.argsort(conf_scores_result) indexes = indexes[:self.keep_top_k] boxes_result = boxes_result[indexes] masks_result = masks_result[indexes] classes_result = classes_result[indexes] conf_scores_result = conf_scores_result[indexes] return ( np.array(boxes_result), np.array(masks_result), np.array(classes_result), np.array(conf_scores_result), )
def text_detection(image, east, min_confidence, width, height): # load the input image and grab the image dimensions image = cv2.imread(image) orig = image.copy() (origHeight, origWidth) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (width, height) ratioWidth = origWidth / float(newW) ratioHeight = origHeight / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (imageHeight, imageWidth) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector print("[INFO] loading EAST text detector...") net = cv2.dnn.readNet(east) # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() # show timing information on text prediction print("[INFO] text detection took {:.6f} seconds".format(end - start)) # NMS on the the unrotated rects confidenceThreshold = min_confidence nmsThreshold = 0.4 # decode the blob info (rects, confidences, baggage) = decode(scores, geometry, confidenceThreshold) offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) ########################################################## functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] print("[INFO] Running nms.boxes . . .") for i, function in enumerate(functions): start = time.time() indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) drawrects = np.array(rects)[indicies] name = function.__module__.split('.')[-1].title() print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format( name, end - start, len(drawrects))) drawOn = orig.copy() drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2) title = "nms.boxes {}".format(name) cv2.imshow(title, drawOn) cv2.moveWindow(title, 150 + i * 300, 150) #cv2.waitKey(0) # convert rects to polys polygons = utils.rects2polys(rects, thetas, offsets, ratioWidth, ratioHeight) print("[INFO] Running nms.polygons . . .") for i, function in enumerate(functions): start = time.time() indicies = nms.polygons(polygons, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) drawpolys = np.array(polygons)[indicies] name = function.__module__.split('.')[-1].title() print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format( name, end - start, len(drawpolys))) drawOn = orig.copy() drawPolygons(drawOn, drawpolys, ratioWidth, ratioHeight, (0, 255, 0), 2) title = "nms.polygons {}".format(name) cv2.imshow(title, drawOn) cv2.moveWindow(title, 150 + i * 300, 150) cv2.waitKey(0)
def main(): #Training commented out: ################################################################## # clf, alpha = train(images, 2429, 4548) # print(clf) # print(alpha) # final = zip(clf, alpha) # pickle.dump(final, open('./final_classifier.pickle', 'wb'), protocol=2) ######################################### args = parse_args() print("loading images...") imagePaths = (os.listdir(args.dir_path)) with open(r"./final_classifier.pickle", "rb") as f: file = pickle.load(f) clf = [] alpha = [] for i, j in file: # print(i,j) clf.append(i) alpha.append(j) #Weak Classifier List # clf = [([[(5, 18, 17, 3)], [(8, 18, 17, 3)]], -1050.0, 1), ([[(5, 13, 9, 6)], [(11, 13, 9, 6)]], -216.0, -1), ([[(5, 13, 10, 6)], [(11, 13, 10, 6)]], -365.0, 1), ([[(4, 13, 11, 5)], [(9, 13, 11, 5)]], -312.0, -1), ([[(8, 13, 10, 6)], [(14, 13, 10, 6)]], -367.0, -1), ([[(5, 14, 13, 5)], [(10, 14, 13, 5)]], -1042.0, 1), ([[(7, 17, 17, 3)], [(10, 17, 17, 3)]], -49.0, -1), ([[(7, 17, 11, 3)], [(10, 17, 11, 3)]], -127.0, 1), ([[(7, 16, 10, 3)], [(10, 16, 10, 3)]], -74.0, -1), ([[(7, 9, 4, 2)], [(9, 9, 4, 2)]], -21.0, 1)] #Alpha List # alpha = [6.575151471691274, 7.266783069631535, 9.342094529936636, 12.262314436203779, 9.807209897582606, 11.805431834748102, 11.677685482457234, 10.758513212206026, 14.769595937010674, 10.26293208110991] json_list = [] print(len(imagePaths)) for imagePath in imagePaths: print(imagePath) image = cv2.imread(os.path.join(args.dir_path, imagePath), 0) ogimg = cv2.imread(os.path.join(args.dir_path, imagePath)) # ogimg = cv2.imread(os.path.join(path, imagePath)) img = image.copy() faces = list() scores = list() for i in range(0, len(ogimg), 19): for j in range(0, len(ogimg[0]), 19): t_img = (img[i:i + 19, j:j + 19]) if t_img.shape[0] != 19 or t_img.shape[1] != 19: t_img = cv2.resize(t_img, (19, 19)) t, res = strong_classifier(t_img, alpha, clf) if res == 1: faces.append((j, i, j + 60, i + 60)) scores.append(t) # print(len(faces)) z = sorted(zip(faces, scores), key=lambda x: x[1], reverse=True) faces = [i[0] for i in z] scores = [i[1] for i in z] # print((scores)) indicies = nms.boxes(faces, scores) # print(len(indicies)) # print(indicies) final_faces = [] final_scores = [] for c, i in enumerate(indicies[:4]): face = faces[i] final_faces.append( [face[0], face[1], face[2] + 200, face[3] + 200]) final_scores.append(scores[i]) # print(faces[i], scores[i]) index = nms.boxes(final_faces, final_scores) # print(index) for i in index: face = final_faces[i] if (face[0] < ogimg.shape[1] and face[2] + 50 < ogimg.shape[1] and face[1] < ogimg.shape[0] and face[3] + 50 < ogimg.shape[0]): cv2.rectangle(ogimg, (face[0], face[1]), (face[2] - 100, face[3] - 100), (0, 255, 255), 2) element = { "iname": imagePath, "bbox": [face[0], face[1], face[2] - 100, face[3] - 100] } json_list.append(element) print(len(json_list)) # cv2.imwrite(os.path.join(respath, (imagePath)), ogimg) # print(json_list) #the result json file name output_json = "results.json" #dump json_list to result.json with open(output_json, 'w') as f: json.dump(json_list, f)
for frame in range(num_frames): predicted_bboxes_frame = [] predicted_scores = [] for j in range(descriptors.shape[1]): if predictions[frame, j] > 0.5: center_x, center_y, w, h, p, _ = descriptors[frame, j] center_x *= width w *= width center_y *= height h *= height x = center_x - w / 2. y = center_y - h / 2. predicted_scores.append(predictions[frame, j]) predicted_bboxes_frame.append([x, y, x+w, y+h]) indices = nms.boxes(predicted_bboxes_frame, predicted_scores) # non maximal suppresion for index in indices: predicted_bboxes_word[frame].append(predicted_bboxes_frame[index]) tracked_predictions_word = assign_ids(predicted_bboxes_word) for i, tracked_predictions_frame in enumerate(tracked_predictions_word): for tracked_prediction in tracked_predictions_frame: predicted_bboxes[i].append(list(tracked_prediction[0:4])) predicted_ids[i].append(int(tracked_prediction[4])) get_metrics(predicted_bboxes, predicted_ids, annotations_path, acc) mh = mm.metrics.create() try: summary = mh.compute_many( [acc],
offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) ########################################################## functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] print("[INFO] Running nms.boxes . . .") for i, function in enumerate(functions): start = time.time() indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) drawrects = np.array(rects)[indicies] name = function.__module__.split('.')[-1].title() print("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format(name, end - start, len(drawrects))) drawOn = orig.copy() drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2) ############## Lo mío ################ print(drawrects, indicies, ratioHeight, ratioWidth) # drawrects contiene las coordenadas de las cajas de texto detectadas en formato (x, y, w, h)
scores = [] for i in range(descriptors.shape[1]): if predictions[0, frame + 1, i] > 0.25: center_x, center_y, w, h, _, _ = descriptors[frame + 1, i] center_x *= inp.shape[1] w *= inp.shape[1] center_y *= inp.shape[0] h *= inp.shape[0] x = center_x - w / 2. y = center_y - h / 2. scores.append(predictions[0, frame + 1, i]) rectongles.append(np.array( [x, y, x + w, y + h])) # sort requires x1, y1, x2, y2, score=1??? indices = nms.boxes(rectongles, scores) # non maximal suppression for index in indices: detections_word[frame].append(rectongles[index]) frame += 1 tracked_detections_word = assign_ids( detections_word) # there might be id collisions for i, tracked_detections_frame in enumerate(tracked_detections_word): for tracked_detection in tracked_detections_frame: tracked_detections[i].append([tracked_detection, word]) cap.set(cv2.CAP_PROP_POS_FRAMES, 1) fig, ax = plt.subplots(1, figsize=(15, 15)) ret, inp = cap.read() frame = 0
def cli(ctx, sink, opt_disk, opt_net, opt_conf_thresh, opt_nms_thresh): """Generates scene text ROIs (CV DNN)""" # ---------------------------------------------------------------- # imports import os from os.path import join from pathlib import Path import click import cv2 as cv import numpy as np from nms import nms from vframe.utils import click_utils, file_utils, im_utils, logger_utils, dnn_utils from vframe.utils import scenetext_utils from vframe.models.metadata_item import ROIMetadataItem, ROIDetectResult from vframe.settings.paths import Paths from vframe.models.bbox import BBox # ---------------------------------------------------------------- # init log = logger_utils.Logger.getLogger() metadata_type = types.Metadata.TEXT_ROI # initialize dnn if opt_net == types.SceneTextNet.EAST: # TODO externalize dnn_size = (320, 320) # fixed dnn_mean_clr = (123.68, 116.78, 103.94) # fixed dnn_scale = 1.0 # fixed dnn_layer_names = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" ] fp_model = join(cfg.DIR_MODELS_TF, 'east', 'frozen_east_text_detection.pb') log.debug('fp_model: {}'.format(fp_model)) net = cv.dnn.readNet(fp_model) #net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV) #net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) elif opt_net == types.SceneTextNet.DEEPSCENE: dnn_size = (320, 320) # fixed fp_model = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext', "TextBoxes_icdar13.caffemodel") fp_prototxt = join(cfg.DIR_MODELS_CAFFE, 'deepscenetext', 'textbox.prototxt') net = cv.text.TextDetectorCNN_create(fp_prototxt, fp_model) # ---------------------------------------------------------------- # process # iterate sink while True: chair_item = yield metadata = {} for frame_idx, frame in chair_item.keyframes.items(): if opt_net == types.SceneTextNet.DEEPSCENE: # DeepScene scene text detector (opencv contrib) frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) frame_dim = frame.shape[:2][::-1] rects, probs = net.detect(frame) det_results = [] for r in range(np.shape(rects)[0]): prob = float(probs[r]) if prob > opt_conf_thresh: rect = BBox.from_xywh_dim( *rects[r], frame_dim).as_xyxy() # normalized det_results.append(ROIDetectResult(prob, rect)) metadata[frame_idx] = det_results elif types.SceneTextNet.EAST: # EAST scene text detector frame = im_utils.resize(frame, width=dnn_size[0], height=dnn_size[1]) # frame = im_utils.resize(frame, width=dnn_size[0], he) frame_dim = frame.shape[:2][::-1] frame_dim = dnn_size # blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False) blob = cv.dnn.blobFromImage(frame, dnn_scale, dnn_size, dnn_mean_clr, swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(dnn_layer_names) (rects, confidences, baggage) = scenetext_utils.east_text_decode( scores, geometry, opt_conf_thresh) det_results = [] if rects: offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) # functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] indicies = nms.boxes(rects, confidences, nms_function=nms.fast.nms, confidence_threshold=opt_conf_thresh, nsm_threshold=opt_nms_thresh) indicies = np.array(indicies).reshape(-1) rects = np.array(rects)[indicies] scores = np.array(confidences)[indicies] for rect, score in zip(rects, scores): rect = BBox.from_xywh_dim( *rect, frame_dim).as_xyxy() # normalized det_results.append(ROIDetectResult(score, rect)) metadata[frame_idx] = det_results # append metadata to chair_item's mapping item chair_item.set_metadata(metadata_type, ROIMetadataItem(metadata)) # ---------------------------------------------------------------- # yield back to the processor pipeline # send back to generator sink.send(chair_item)
cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL) # init(reset) the identifer id_num = 0 # tracking process in each frame for nn, im_path in enumerate(images): frame = nn + 1 img = cv2.imread(im_path) print('Frame {} is loaded'.format(frame)) # load the detection results of this frame pre_frame_det_results = det_results[det_results[:, 0] == frame] # non-maximal surpressing [frame, id, x, y, w, h, score] indices = nms.boxes(pre_frame_det_results[:, 2:6], pre_frame_det_results[:, 6]) frame_det_results = pre_frame_det_results[indices, :] # extract the bbox [fr, id, (x, y, w, h), score] bboxes = frame_det_results[:, 2:6] ############################################ # ***multiple tracking and associating*** # ############################################ # 1. sort trackers index1, index2 = sort_trackers(trackers) # 2. save the processed index of trackers index_processed = [] for k in range(2):
def main(): np.random.seed(37) max_x, max_y = 10000, 10000 num_boxes = 100000 #iou_threshold, score_threshold = 0.5, float('-inf') iou_threshold, score_threshold = 0.5, 0.001 max_output_size = num_boxes #-------------------- print('Start generating data...') start_time = time.time() boxes = list() for _ in range(num_boxes): xs = np.random.randint(max_x, size=2) ys = np.random.randint(max_y, size=2) boxes.append([np.min(xs), np.min(ys), np.max(xs), np.max(ys)]) boxes = np.array(boxes) scores = np.random.rand(num_boxes) boxes_scores = np.hstack([boxes, scores.reshape([-1, 1])]).astype(np.float32) #boxes2 = np.zeros((num_boxes,), dtype=py_parallel_nms_cpu.PyBox) #for idx in range(num_boxes): # boxes2[idx] = py_parallel_nms_cpu.PyBox(*boxes_scores[idx,:]) print('\tElapsed time = {}'.format(time.time() - start_time)) print('End generating data.') """ #-------------------- # REF [site] >> https://github.com/rbgirshick/py-faster-rcnn # Too slow. print('Start py_cpu_nms...') start_time = time.time() selected_indices = py_cpu_nms.py_cpu_nms(boxes_scores, iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices) print('End py_cpu_nms.') #-------------------- # REF [site] >> https://github.com/rbgirshick/py-faster-rcnn # Too slow. print('Start cpu_nms...') start_time = time.time() selected_indices = cpu_nms.cpu_nms(boxes_scores, iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices) print('End cpu_nms.') """ #-------------------- # REF [site] >> https://github.com/rbgirshick/py-faster-rcnn print('Start gpu_nms...') start_time = time.time() selected_indices = gpu_nms.gpu_nms(boxes_scores, iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices) print('End gpu_nms.') """ #-------------------- # REF [site] >> https://github.com/jeetkanjani7/Parallel_NMS # Not good implementation. # Too slow. print('Start parallel_nms_cpu...') start_time = time.time() is_kept_list = py_parallel_nms_cpu.py_parallel_nms_cpu(boxes_scores, iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', np.count_nonzero(is_kept_list)) #print('\tSelected indices =', np.nonzero(is_kept_list)) print('End parallel_nms_cpu.') #-------------------- # REF [site] >> https://github.com/jeetkanjani7/Parallel_NMS # Not good implementation. print('Start parallel_nms_gpu...') start_time = time.time() is_kept_list = py_parallel_nms_gpu.py_parallel_nms_gpu(boxes_scores, iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', np.count_nonzero(is_kept_list)) #print('\tSelected indices =', np.nonzero(is_kept_list)) print('End parallel_nms_gpu.') """ #-------------------- # REF [site] >> # https://bitbucket.org/tomhoag/nms # https://nms.readthedocs.io/en/latest/index.html print('Start nms...') nms_function = nms.fast.nms #nms_function = nms.felzenszwalb.nms #nms_function = nms.malisiewicz.nms start_time = time.time() selected_indices = nms.boxes(boxes, scores, score_threshold=score_threshold, nms_threshold=iou_threshold, nms_function=nms_function) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices) print('End nms.') #-------------------- # REF [site] >> https://docs.opencv.org/3.4/d6/d0f/group__dnn.html print('Start opencv...') start_time = time.time() selected_indices = cv2.dnn.NMSBoxes(boxes.tolist(), scores, score_threshold=score_threshold, nms_threshold=iou_threshold) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices.ravel().tolist()) print('End opencv.') #-------------------- # REF [site] >> https://www.tensorflow.org/api_docs/python/tf/image/non_max_suppression # Slow, not as expected. print('Start tf...') with tf.Session() as sess: # For CUDA context initialization. #A = tf.constant([1]) #A.eval(session=sess) #sess.run(tf.global_variables_initializer()) tf_nms = tf.image.non_max_suppression(boxes, scores, max_output_size, iou_threshold=iou_threshold, score_threshold=score_threshold) start_time = time.time() selected_indices = tf_nms.eval(session=sess) print('\tElapsed time = {}'.format(time.time() - start_time)) print('\t#selected boxes =', len(selected_indices)) #print('\tSelected indices =', selected_indices) print('End tf.')
def detect_text(self, min_confidence=0.01, width=320, height=320): """ Detects text fields according to EAST algoritm. Lookup east dataset and marks founded text fields. Width and Height should be multiple of 32. It is predefined condition for opencv (dnn) operations. """ (origHeight, origWidth) = self.image.shape[:2] (newW, newH) = (width, height) ratioWidth = origWidth / float(newW) ratioHeight = origHeight / float(newH) self.image = cv2.resize(self.image, (newW, newH)) (imageHeight, imageWidth) = self.image.shape[:2] layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" ] try: path = os.path.abspath(os.path.dirname(__file__)) net = cv2.dnn.readNet(path + '/resources/frozen_east_text_detection.pb') except FileNotFoundError: raise ResourceNotFound( """Frozen East Text Detector couldn't find in resources. For download to resource: >>> from skew_correction.data import download >>> download() or $ python -c 'from skew_correction.data import download; download();' """) blob = cv2.dnn.blobFromImage(self.image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) confidenceThreshold = min_confidence nmsThreshold = 0.4 (rects, confidences, baggage) = decode(scores, geometry, confidenceThreshold) offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) indicies = nms.boxes(rects, confidences, nms_function=nms.malisiewicz.nms, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) indicies = np.array(indicies).reshape(-1) drawrects = np.array(rects)[indicies] self.draw_boxes(self.drawOn, drawrects, ratioWidth, ratioHeight, (0, 0, 0), 2)
def get_cropped_image(image, east='frozen_east_text_detection.pb', min_confidence=0.5, width=320, height=320): log('[LM] Cropping Begun:') if TESTING: cv.imshow('INPUT', image) cv.waitKey(0) image = normalizeImageSize(image) (h, w) = image.shape[:2] orig = image.copy() # Constants confidenceThreshold = min_confidence nmsThreshold = 0.4 tb_padding = 0.1 bb_padding = [0.005, 0.05] theta_thresh = 2 m = 3 log('[LM] Get scores 1') image = normalizeImageSize(image) (rects, confidences, baggage) = get_scores(image, east) log('[LM] Get scores 1 END') offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) incl = np.array(thetas) log('[DATA] Initital mean: ', degrees(np.mean(incl))) if incl.size == 0: log('[ERROR] No theta found in included', thetas) return None theta, std = np.mean(incl), np.std(incl) incl = incl[abs(incl - theta) / std <= m] theta = np.mean(incl) if degrees(theta) < theta_thresh: theta = 0 log('[DATA] Updated Theta:', degrees(theta)) log('[DATA] Inclinations and theta: ') log('THETAS:', len(np.array(thetas))) log('INCL: ', len(incl)) res = rotate_image(orig.copy(), theta) if TESTING: cv.imshow('TEST', res) cv.waitKey(0) drawOn = orig.copy() # TODO: Delete this function = nms.felzenszwalb.nms indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) drawrects = np.array(rects)[indicies] drawBoxes(drawOn, drawrects, 1, 1, (0, 255, 0), 2) if TESTING: cv.imshow('DRAW', drawOn) cv.waitKey(0) log('[LM] Get scores 2') res = normalizeImageSize(res) (rects, confidences, baggage) = get_scores(res, east) log('[LM] Get scores 2 END') function = nms.felzenszwalb.nms start = time.time() indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) drawrects = np.array(rects)[indicies] name = function.__module__.split('.')[-1].title() log("[INFO] {} NMS took {:.6f} seconds and found {} boxes".format( name, end - start, len(drawrects))) drawOn = res.copy() drawBoxes(drawOn, drawrects, 1, 1, (0, 255, 0), 2) if TESTING: cv.imshow('DRAWON AFTER ROTATION', drawOn) cv.waitKey(0) h, w = res.shape[:2] uLim = lambda x, m: x if x < m else m lLim = lambda x: x if x > 0 else 0 drawrects = removeExtremes(drawrects) bb_coords = [ int(min(drawrects[:, 0])), int(min(drawrects[:, 1])), int(max(drawrects[:, 0] + (1 + tb_padding) * drawrects[:, 2])), int(max(drawrects[:, 1] + (1 + tb_padding) * drawrects[:, 3])) ] bb_w = bb_coords[3] - bb_coords[1] bb_h = bb_coords[2] - bb_coords[0] text_box = res[lLim(bb_coords[1] - int(h * bb_padding[1] / 2)):uLim(bb_coords[3] + int(h * bb_padding[1] / 2), h), lLim(bb_coords[0] - int(w * bb_padding[0] / 2)):uLim(bb_coords[2] + int(w * bb_padding[0] / 2), w)] if text_box.size == 0: log('[ERROR] Textbox not found') return None if TESTING: cv.imshow("Final Cropping", text_box) cv.waitKey(0) return text_box
def main(args): seq_name = args.seq_name # the packages of trackers from pysot.core.config import cfg # use the modified config file to reset the tracking system from pysot.models.model_builder import ModelBuilder # modified single tracker with warpper from mot_zj.MUST_sot_builder import build_tracker from mot_zj.MUST_utils import draw_bboxes, find_candidate_detection, handle_conflicting_trackers, sort_trackers from mot_zj.MUST_ASSO.MUST_asso_model import AssociationModel from mot_zj.MUST_utils import traj_interpolate dataset_dir = os.path.join(root, 'result') seq_type = 'img' # set the path of config parameters and config_path = os.path.join(track_dir, "mot_zj", "MUST_config_file", "alex_config.yaml") model_params = os.path.join(params_dir, "alex_model.pth") # enable the visualisation or not is_visualisation = False # print the information of the tracking process or not is_print = True results_dir = os.path.join(dataset_dir, 'track') if not os.path.exists(results_dir): os.makedirs(results_dir) img_traj_dir = os.path.join(track_dir, "img_traj") if os.path.exists(os.path.join(img_traj_dir, seq_name)): shutil.rmtree(os.path.join(img_traj_dir, seq_name)) seq_dir = os.path.join(dataset_dir, seq_type) seq_names = os.listdir(seq_dir) seq_num = len(seq_names) # record the processing time start_point = time.time() # load config # load the config information from other variables cfg.merge_from_file(config_path) # set the flag that CUDA is available cfg.CUDA = torch.cuda.is_available() device = torch.device('cuda' if cfg.CUDA else 'cpu') # create the tracker model (Resnet50) track_model = ModelBuilder() # load tracker model track_model.load_state_dict( torch.load(model_params, map_location=lambda storage, loc: storage.cpu())) track_model.eval().to(device) # create assoiation model asso_model = AssociationModel(args) seq_det_path = os.path.join(seq_dir, seq_name, 'det') seq_img_path = os.path.join(seq_dir, seq_name, 'img1') # print path and dataset information if is_print: print('preparing for the sequence: {}'.format(seq_name)) print('-----------------------------------------------') print("detection result path: {}".format(seq_det_path)) print("image files path: {}".format(seq_img_path)) print('-----------------------------------------------') # read the detection results det_results = np.loadtxt(os.path.join(seq_det_path, 'det.txt'), dtype=float, delimiter=',') # read images from each sequence images = sorted(glob.glob(os.path.join(seq_img_path, '*.jpg'))) img_num = len(images) # the contrainer of trackers trackers = [] # visualisation settings if is_visualisation: cv2.namedWindow(seq_name, cv2.WINDOW_NORMAL) # init(reset) the identifer id_num = 0 # tracking process in each frame for nn, im_path in enumerate(images): each_start = time.time() frame = nn + 1 img = cv2.imread(im_path) print('Frame {} is loaded'.format(frame)) # load the detection results of this frame pre_frame_det_results = det_results[det_results[:, 0] == frame] # non-maximal surpressing [frame, id, x, y, w, h, score] indices = nms.boxes(pre_frame_det_results[:, 2:6], pre_frame_det_results[:, 6]) frame_det_results = pre_frame_det_results[indices, :] # extract the bbox [fr, id, (x, y, w, h), score] bboxes = frame_det_results[:, 2:6] ############################################ # ***multiple tracking and associating*** # ############################################ # 1. sort trackers index1, index2 = sort_trackers(trackers) # 2. save the processed index of trackers index_processed = [] track_time = 0 asso_time = 0 for k in range(2): # process trackers in the first or the second class if k == 0: index_track = index1 else: index_track = index2 track_start = time.time() for ind in index_track: if trackers[ind].track_state == cfg.STATE.TRACKED or trackers[ ind].track_state == cfg.STATE.ACTIVATED: indices = find_candidate_detection( [trackers[i] for i in index_processed], bboxes) to_track_bboxes = bboxes[ indices, :] if not bboxes.size == 0 else np.array([]) # MOT_track(tracking process) trackers[ind].track(img, to_track_bboxes, frame) # if the tracker keep its previous tracking state (tracked or activated) if trackers[ ind].track_state == cfg.STATE.TRACKED or trackers[ ind].track_state == cfg.STATE.ACTIVATED: index_processed.append(ind) track_time += time.time() - track_start asso_start = time.time() for ind in index_track: if trackers[ind].track_state == cfg.STATE.LOST: indices = find_candidate_detection( [trackers[i] for i in index_processed], bboxes) to_associate_bboxes = bboxes[ indices, :] if not bboxes.size == 0 else np.array([]) # MOT_track(association process) trackers[ind].track(img, to_track_bboxes, frame) # add process flag index_processed.append(ind) asso_time += time.time() - asso_start ############################################ # ***init new trackers *** # ############################################ # find the candidate bboxes to init new trackers indices = find_candidate_detection(trackers, bboxes) # process the tracker: init (1st frame) and track mathod (the other frames) for index in indices: id_num += 1 new_tracker = build_tracker(track_model) new_tracker.init(img, bboxes[index, :], id_num, frame, seq_name, asso_model) trackers.append(new_tracker) # find conflict of trackers (I need to know what conflict) trackers = handle_conflicting_trackers(trackers, bboxes) # interpolate the tracklet results for tracker in trackers: if tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED: bbox = tracker.tracking_bboxes[-1, :] traj_interpolate(tracker, bbox, tracker.frames[-1], 30) ############################################ # ***collect tracking results*** # ############################################ # collect the tracking results (all the results, without selected) if frame == len(images): results_bboxes = np.array([]) for tracker in trackers: if results_bboxes.size == 0: results_bboxes = tracker.results_return() else: res = tracker.results_return() if not res.size == 0: results_bboxes = np.concatenate( (results_bboxes, tracker.results_return()), axis=0) # test code segment filename = '{}.txt'.format(seq_name) results_bboxes = results_bboxes[np.argsort(results_bboxes[:, 0])] print(results_bboxes.shape[0]) # detections filter indices = [] if seq_name == 'b1': for ind, result in enumerate(results_bboxes): if result[3] > 540: if result[4] * result[5] < 10000: indices.append(ind) results_bboxes = np.delete(results_bboxes, indices, axis=0) np.savetxt(os.path.join(results_dir, filename), results_bboxes, fmt='%d,%d,%.1f,%.1f,%.1f,%.1f') ############################################ # ***crop tracklet image*** # ############################################ for tracker in trackers: if tracker.track_state == cfg.STATE.START or tracker.track_state == cfg.STATE.TRACKED or tracker.track_state == cfg.STATE.ACTIVATED: bbox = tracker.tracking_bboxes[-1, :] x1 = int(np.floor(np.maximum(1, bbox[0]))) y1 = int(np.ceil(np.maximum(1, bbox[1]))) x2 = int(np.ceil(np.minimum(img.shape[1], bbox[0] + bbox[2]))) y2 = int(np.ceil(np.minimum(img.shape[0], bbox[1] + bbox[3]))) img_traj = img[y1:y2, x1:x2, :] traj_path = os.path.join(img_traj_dir, seq_name, str(tracker.id_num)) if not os.path.exists(traj_path): os.makedirs(traj_path) tracklet_img_path = os.path.join(traj_path, str(tracker.frames[-1])) cv2.imwrite("{}.jpg".format(tracklet_img_path), img_traj) each_time = time.time() - each_start print("period: {}s, track: {}s({:.2f}), asso: {}s({:.2f})".format( each_time, track_time, (track_time / each_time) * 100, asso_time, (asso_time / each_time) * 100)) if is_visualisation: ########################################## # infomation print and visualisation # ########################################## # print("THe numger of new trackers: {}".format(len(indices))) active_trackers = [ trackers[i].id_num for i in range(len(trackers)) if trackers[i].track_state == cfg.STATE.ACTIVATED or trackers[i].track_state == cfg.STATE.TRACKED or trackers[i].track_state == cfg.STATE.LOST ] print("The number of active trackers: {}".format( len(active_trackers))) print(active_trackers) anno_img = draw_bboxes(img, bboxes) cv2.imshow(seq_name, anno_img) cv2.waitKey(1) print("The running time is: {} s".format(time.time() - start_point)) print("The total processing time is: {} s".format(time.time() - start_point))
def batch_analyze(self, images=None, *args, **kwargs): """ Analyze for a batch of images :param images: :return: """ assert images is not None, ValueError('images can not be None') with torch.no_grad(): if self.device == 'cuda': torch.cuda.synchronize() _images = [] for image in images: _images.append(F.to_tensor(image).to(self.device)) del image l_images = images.__len__() del images output = self.model(_images) _images = [] del _images if 'use_listmemmap' in kwargs: f_out = ListMemMap() else: f_out = List() for id in range(output.__len__()): for key, item in output[id].items(): if self.device == 'cuda': item = item.cpu() output[id][key] = item.detach().numpy() del item boxes = [[x1, y1, x2 - x1, y2 - y1] for x1, y1, x2, y2 in output[id]['boxes']] scores = output[id]['scores'] rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25) tmp = list() tmp.append([ output[id]['boxes'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) tmp.append([ output[id]['labels'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) tmp.append([ output[id]['scores'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) f_out.append(tmp) del tmp del output, l_images, boxes, scores, rects if self.device == 'cuda': torch.cuda.empty_cache() gc.collect() return f_out
def text_detection(image, east, min_confidence, width, height): image = cv2.imread(image) orig = image.copy() (origHeight, origWidth) = image.shape[:2] (newW, newH) = (width, height) ratioWidth = origWidth / float(newW) ratioHeight = origHeight / float(newH) image = cv2.resize(image, (newW, newH)) (imageHeight, imageWidth) = image.shape[:2] layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # original slika cv2.imshow("Original", orig) print("[INFO] Učitavanje EAST text detektora...") net = cv2.dnn.readNet(east) blob = cv2.dnn.blobFromImage(image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() print("[INFO] Trajanje detekcije: {:.6f} sekundi".format(end - start)) confidenceThreshold = min_confidence nmsThreshold = 0.4 (rects, confidences, baggage) = decode(scores, geometry, confidenceThreshold) offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] #print("[INFO] Running nms.boxes . . .") for i, function in enumerate(functions): start = time.time() indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) drawrects = np.array(rects)[indicies] name = function.__module__.split('.')[-1].title() print( "[INFO] Trajanje izvođenja {} metode: {:.6f} seconds i prondađeno je {} okvira" .format(name, end - start, len(drawrects))) drawOn = orig.copy() drawBoxes(drawOn, drawrects, ratioWidth, ratioHeight, (0, 255, 0), 2) title = "nms.boxes {}".format(name) cv2.imshow(title, drawOn) cv2.moveWindow(title, 150 + i * 300, 350) cv2.waitKey(0) """
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx6 (x1, y1, x2, y2, conf, cls) """ nc = prediction[0].shape[1] - 5 # number of classes xc = prediction[..., 4] > conf_thres # candidates # Settings min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height max_det = 300 # maximum number of detections per image time_limit = 10.0 # seconds to quit after redundant = True # require redundant detections multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) t = time.time() output = [None] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height x = x[xc[xi]] # confidence # If none remain process next image if not x.shape[0]: continue # Compute conf x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf # Box (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) # Detections matrix nx6 (xyxy, conf, cls) if multi_label: i, j = (x[:, 5:] > conf_thres).nonzero() x = np.concatenate( (box[i], x[i, j + 5, None], j[:, None].astype(np.float32)), axis=1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) x = np.concatenate((box, conf, j.float()), axis=1)[conf.view(-1) > conf_thres] # Filter by class if classes: x = x[(x[:, 5:6] == np.array(classes)).any(1)] # Apply finite constraint # if not torch.isfinite(x).all(): # x = x[torch.isfinite(x).all(1)] # If none remain process next image n = x.shape[0] # number of boxes if not n: continue # Sort by confidence # x = x[x[:, 4].argsort(descending=True)] # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores boxes = xyxy2xywh(boxes) i = nms.boxes(boxes, scores, nms_threshold=iou_thres) if len(i) > max_det: # limit detections i = i[:max_det] if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean) try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 print(x, i, x.shape, i.shape) pass output[xi] = x[i] if (time.time() - t) > time_limit: break # time limit exceeded return output
def text_detect(image, min_confidence, width, height): # load the input image and grab the image dimensions image = cv2.imread(image) orig = image.copy() (origHeight, origWidth) = image.shape[:2] # set the new width and height and then determine the ratio in change # for both the width and height (newW, newH) = (width, height) ratioWidth = origWidth / float(newW) ratioHeight = origHeight / float(newH) # resize the image and grab the new image dimensions image = cv2.resize(image, (newW, newH)) (imageHeight, imageWidth) = image.shape[:2] # define the two output layer names for the EAST detector model that # we are interested -- the first is the output probabilities and the # second can be used to derive the bounding box coordinates of text layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"] # load the pre-trained EAST text detector # print("[INFO] loading EAST text detector...") net = text_model # construct a blob from the image and then perform a forward pass of # the model to obtain the two output layer sets blob = cv2.dnn.blobFromImage(image, 1.0, (imageWidth, imageHeight), (123.68, 116.78, 103.94), swapRB=True, crop=False) start = time.time() net.setInput(blob) (scores, geometry) = net.forward(layerNames) end = time.time() # NMS on the the unrotated rects confidenceThreshold = min_confidence nmsThreshold = 0.4 # decode the blob info (rects, confidences, baggage) = decode(scores, geometry, confidenceThreshold) offsets = [] thetas = [] for b in baggage: offsets.append(b['offset']) thetas.append(b['angle']) ########################################################## functions = [nms.felzenszwalb.nms, nms.fast.nms, nms.malisiewicz.nms] drawpolys_count, drawrects_count = 0, 0 for i, function in enumerate(functions): indicies = nms.boxes(rects, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) indicies = np.array(indicies).reshape(-1) if len(indicies) != 0: drawrects = np.array(rects)[indicies] drawpolys_count = len(drawrects) # convert rects to polys polygons = utils.rects2polys(rects, thetas, offsets, ratioWidth, ratioHeight) for i, function in enumerate(functions): start = time.time() indicies = nms.polygons(polygons, confidences, nms_function=function, confidence_threshold=confidenceThreshold, nsm_threshold=nmsThreshold) end = time.time() indicies = np.array(indicies).reshape(-1) if len(indicies) != 0: drawpolys = np.array(polygons)[indicies] drawrects_count = len(drawpolys) return [drawpolys_count, drawrects_count]