def match_by_max_pmcc(tiles_dir, working_dir, jar_file): # create a workspace directory if not found if not os.path.exists(working_dir): os.makedirs(working_dir) tile_files = glob.glob(os.path.join(tiles_dir, '*')) tiles = load_entire_data(tile_files) # TODO: add all tiles to a kd-tree so it will be faster to find overlap between tiles # iterate over the tiles, and for each tile, find intersecting tiles that overlap, # and match their features # Nested loop: # for each tile_i in range[0..N): # for each tile_j in range[tile_i..N)] for pair in itertools.combinations(tiles, 2): # if the two tiles intersect, match them bbox1 = BoundingBox(tiles[pair[0]]['boundingBox']) bbox2 = BoundingBox(tiles[pair[1]]['boundingBox']) if bbox1.overlap(bbox2): print "Matching by max pmcc tiles: {0} and {1}".format( pair[0], pair[1]) match_two_tiles_by_max_pmcc_features(tiles[pair[0]], tiles[pair[1]], jar_file, working_dir)
def test_overlaps_xz_overlap(self): a = BoundingBox(0, 0, 0, 10, 10, 10) b = BoundingBox(5, 15, 9.5, 30, 30, 30) self.assertTrue(a.overlaps(b)) self.assertEqual(a.overlaps(b), b.overlaps(a)) a = BoundingBox(0, 0, 0, 10, 10, 10) b = BoundingBox(11, 15, 9.5, 30, 30, 30) self.assertFalse(a.overlaps(b)) self.assertEqual(a.overlaps(b), b.overlaps(a))
def _generate_boxes(self) -> None: for i in tqdm(range(len(self._templates))): h, w = np.array(self._templates[i].shape) * self._best_scale if self._is_staff: self._boxes.append([ BoundingBox(0, pt[0], self._reader.img_width, h) for pt in zip(*self._best_locations[i]) ]) else: self._boxes.append([ BoundingBox(pt[1], pt[0], w, h) for pt in zip(*self._best_locations[i]) ]) self._boxes = [j for i in self._boxes for j in i]
def fg_bboxes(seg, frame_id, params): bboxes = [] roi = cv2.imread(params['roi_path'], cv2.IMREAD_GRAYSCALE) / 255 segmentation = seg * roi contours, _ = cv2.findContours(segmentation.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) idx = 0 for c in contours: rect = cv2.boundingRect(c) if rect[2] < 50 or rect[3] < 50 or rect[2] / rect[3] < 0.8: continue # Discard small contours x, y, w, h = rect # TODO: pillar bicis a gt bboxes.append( BoundingBox(id=idx, label='car', frame=frame_id, xtl=x, ytl=y, xbr=x + w, ybr=y + h)) idx += 1 return discard_overlapping_bboxes(bboxes)
def read_annotations(path, grouped=True, use_parked=False): with open(path) as f: tracks = xmltodict.parse(f.read())['annotations']['track'] annotations = [] for track in tracks: id = track['@id'] label = track['@label'] if label != 'car': continue for box in track['box']: is_parked = box['attribute']['#text'] == 'true' if not use_parked and is_parked: continue annotations.append(BoundingBox( id=int(id), label=label, frame=int(box['@frame']), xtl=float(box['@xtl']), ytl=float(box['@ytl']), xbr=float(box['@xbr']), ybr=float(box['@ybr']), occluded=box['@occluded'] == '1', parked=is_parked )) if grouped: return group_by_frame(annotations) return annotations
def read_detections(path, grouped=False, confidenceThr=0.5): """ Format: <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z> """ with open(path) as f: lines = f.readlines() detections = [] for line in lines: det = line.split(sep=',') if float(det[6]) >= confidenceThr: detections.append(BoundingBox( id=int(det[1]), label='car', frame=int(det[0]) - 1, xtl=float(det[2]), ytl=float(det[3]), xbr=float(det[2]) + float(det[4]), ybr=float(det[3]) + float(det[5]), confidence=float(det[6]) )) if grouped: return group_by_frame(detections) return detections
def detect(self, image): resizedImage = Image.fromarray(image).resize( (self._inputWidth, self._inputHeight), Image.ANTIALIAS) self._setInputTensor(resizedImage) startTime = time.monotonic() self._interpreter.invoke() elapsedTime = (time.monotonic() - startTime) * 1000 logging.info("%.0f ms", elapsedTime) boxes = self._getOutputTensor(0) classes = self._getOutputTensor(1) scores = self._getOutputTensor(2) count = int(self._getOutputTensor(3)) results = [] for i in range(count): if scores[i] < self._scoreThreshold: continue if classes[i] not in self._interestedClasses: continue ymin, xmin, ymax, xmax = boxes[i] xmin = int(xmin * image.shape[1]) xmax = int(xmax * image.shape[1]) ymin = int(ymin * image.shape[0]) ymax = int(ymax * image.shape[0]) results.append(BoundingBox(xmin, ymin, xmax, ymax)) return results
def bounding_box_naive_handler(event): box = BoundingBox(convex_hull) plot_data.clean_plot(which='lines') box.append(box[0]) ax.plot(*zip(*box)) fig.canvas.draw() fig.canvas.flush_events()
def load_tiles(tiles_spec_fname, bbox): relevant_tiles = [] with open(tiles_spec_fname, 'r') as data_file: data = json.load(data_file) for tile in data: tile_bbox = BoundingBox(tile['boundingBox']) if bbox.overlap(tile_bbox): relevant_tiles.append(tile) return relevant_tiles
def _json_to_bboxes(line): bboxes = [] json_dicts = json.loads(line) for curr_dict in json_dicts: bbox = BoundingBox(curr_dict["left"], curr_dict["right"], curr_dict["top"], curr_dict["bottom"], curr_dict["label"]) bboxes.append(bbox) return bboxes
def get_bounding_box(X): min_lat = min(X[:, 0]) max_lat = max(X[:, 0]) min_lon = min(X[:, 1]) max_lon = max(X[:, 1]) bbox = BoundingBox(min_lon=min_lon - 0.05*(max_lon-min_lon), min_lat=min_lat - 0.05*(max_lat-min_lat), max_lon=max_lon + 0.05*(max_lon-min_lon), max_lat=max_lat + 0.05*(max_lat-min_lat)) return bbox
def get_background_map_image_and_bounding_box(X): try: # Fetch map using TomTom Maps API bbox = get_bounding_box(X) background_image = get_tomtom_map_raster_image(bbox.min_lon, bbox.min_lat, bbox.max_lon, bbox.max_lat) except OSError as e: # Alternatively, use the offline version print('Error: "{}"; using pre-fetched image...'.format(e)) bbox = BoundingBox(20.39, 44.78, 20.52, 44.84) background_image = plt.imread('staticimage.png') return background_image, bbox
def __init__(self, solid): assert isinstance(solid, TopoDS_Solid) self._solid = copy.deepcopy(solid) props = GlobalProperties(solid) x1, y1, z1, x2, y2, z2 = props.bbox() xmin = min(x1, x2) xmax = max(x1, x2) ymin = min(y1, y2) ymax = max(y1, y2) zmin = min(z1, z2) zmax = max(z1, z2) self._bbox = BoundingBox(xmin, ymin, zmin, xmax, ymax, zmax)
def detect(self, frame: VideoFrame, input_w=256, input_h=256, class_threshold=0.6, labels=["person"], anchors=[[116, 90, 156, 198, 373, 326], [30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]]): """ Bemeneti parméterek: input_w/h: modell bemeneti mérete class_treshold: ennyi konfidencia felett tartjuk meg a jelölt osztályokat labels: ezeket ismeri fel (be lehet rakni csomó mindent, fun) anchors: ezek alapján képzi le a BB-ket Feldolgozás lépései: 1. Kép betöltése, előfeldolgozása 2. Modell futtatása 3. BoundigBox-ok előállítása 4. BB méret korrekció 5. átfedések kezelése 4. BB címkézése Kimenet: boxes: befoglaló doboz labels: predikált osztály (nálunk ugye ez mindig person lesz, ezért kivehető akár) scores: ~konfidencia """ image, image_w, image_h = self.load_image_pixels( frame, (input_w, input_h)) yhat = self.model.predict(image) boxes = list() for i in range(len(yhat)): boxes += yolo.decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w) self.correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w) yolo.do_nms(boxes, 0.5) boxes, labels, scores = self.get_boxes(boxes, labels, class_threshold) ret_boxes = [] for box in boxes: y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax width, height = x2 - x1, y2 - y1 ret_boxes.append(BoundingBox(x1, y1, width, height)) return ret_boxes, scores
def to_bounding_box( gt_boxes: List[Dict[str, torch.Tensor]], pred_boxes: List[Dict[str, torch.Tensor]] ) -> Tuple[List[BoundingBox], List[BoundingBox]]: gt_parsed_boxes, pred_parsed_boxes = [], [] # for each image for i, (gts, preds) in enumerate(zip(gt_boxes, pred_boxes)): image_name = str(i) # process ground truth bounding boxes boxes = list(gts['boxes'].detach().cpu().numpy()) classes = list(gts['labels'].detach().cpu().numpy()) for box, box_class in zip(boxes, classes): coords = tuple(box) b = BoundingBox(image_name=image_name, class_id=box_class, coordinates=coords, type_coordinates=CoordinatesType.ABSOLUTE, bb_type=BBType.GROUND_TRUTH, format=BBFormat.XYX2Y2) gt_parsed_boxes.append(b) # process detections boxes = list(preds['boxes'].detach().cpu().numpy()) classes = list(preds['labels'].detach().cpu().numpy()) conf = list(preds['scores'].detach().cpu().numpy()) for box, box_class, box_confidence in zip(boxes, classes, conf): coords = tuple(box) b = BoundingBox(image_name=image_name, class_id=box_class, coordinates=coords, confidence=box_confidence, type_coordinates=CoordinatesType.ABSOLUTE, bb_type=BBType.DETECTED, format=BBFormat.XYX2Y2) pred_parsed_boxes.append(b) return gt_parsed_boxes, pred_parsed_boxes
def filter_tiles(tiles_fname, working_dir, bbox): # parse the bounding box arguments bbox = BoundingBox(bbox) # create a workspace directory if not found if not os.path.exists(working_dir): os.makedirs(working_dir) # load all tiles from the tile-spec json file that are relevant to our bounding box relevant_tiles = load_tiles(tiles_fname, bbox) # Create a tile-spec file for each relevant tile create_single_tile_specs(relevant_tiles, working_dir)
def add_noise(annotations, noise_params, num_frames): # to generate a BB randomly in an image, we use the mean width and # height of the annotated BBs so that they have similar statistics mean_w = 0 mean_h = 0 noisy_annotations = [] # add noise to existing BBs (annotations) for box in annotations: # remove BB if np.random.random() <= noise_params['drop']: continue # generate BB close to an existing one if np.random.random() <= noise_params['generate_close']: new_box = deepcopy(box) noise['gaussian'](new_box, noise_params) noisy_annotations.append(new_box) b = deepcopy(box) # add noise to existing BB if noise_params['type'] is not None: noise[noise_params['type']](b, noise_params) noisy_annotations.append(b) mean_w += box.width mean_h += box.height mean_w /= len(annotations) mean_h /= len(annotations) # generate random BBs in random frames for frame_id in range(num_frames): if np.random.random() <= noise_params['generate_random']: # center of the BB (cx, cy), width and height (w, h) cx = np.random.randint(mean_w // 2, img_shape[1] - mean_w // 2) cy = np.random.randint(mean_h // 2, img_shape[0] - mean_h // 2) w = np.random.normal(mean_w, 10) h = np.random.normal(mean_h, 10) noisy_annotations.append( BoundingBox(id=-1, label='car', frame=frame_id, xtl=cx - w / 2, ytl=cy - h / 2, xbr=cx + w / 2, ybr=cy + h / 2)) return noisy_annotations
def detect_russian_word(self, color_image, depth_image): """ Detect words in given image. Returns ------- A list of box objects that contain desired text """ # filter image _, filter_image = cv2.threshold(np.mean(color_image, axis=2), 185, 255, cv2.THRESH_BINARY) # shows what the filtered image looks like # cv2.imshow('img', filter_image) # cv2.waitKey(0) ## only return boxes that have text in them ## eg. find a way to check if boxes are repetitive or do not contain text d = pytesseract.image_to_data(filter_image, output_type=pytesseract.Output.DICT, lang="uzb_cyrl") n_boxes = len(d['level']) box_obs = [] contents = d['text'] # print(contents) for i in range(n_boxes): if not contents[i]: continue else: for j in contents[i]: if j in self.text: # print(contents[i]) (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i]) # cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2) verts = [(x, y), (x + w, y), (x, y + h), (x + w, y + h)] cv2.rectangle(filter_image, verts[0], verts[-1], (0, 255, 0), 2) box = BoundingBox(verts, ObjectType('text')) box_obs.append(box) break # cv2.imshow('img', filter_image) # cv2.waitKey(0) return box_obs
def split_heights(bounding_boxes, evaled_avg_line_height): idx = 0 splitted = [] while idx < len(bounding_boxes) - 1: box = bounding_boxes[idx] height = box.h n = int(height / evaled_avg_line_height) if n > 1: new_height = int(round(height / n)) bounding_boxes.pop(idx) splitted += [BoundingBox((box.x, box.y + incr, box.w, new_height)) for incr in range(0, int(new_height * n), new_height)] else: idx += 1 return sorted(bounding_boxes + splitted, key=lambda x: x.y)
def detections_from_csv(csv_filepath: str) -> List[Detection]: """ reads a detection list from csv file :param csv_filepath: csv file path :return: detections list """ detections = list() with open(csv_filepath, newline='') as csvfile: reader = csv.DictReader(csvfile) for row in reader: top_right_coord = Coordinate(int(row[X_TOP_RIGHT_KEY]), int(row[Y_TOP_RIGHT_KEY])) bb = BoundingBox(top_right_coord, int(row[BB_W]), int(row[BB_H])) detection = Detection(bb, float(row[SCORE])) detections.append(detection) return detections
def getBoundingBoxPoints(self): """ :returns: All four bounding box points starting at top left -> top right -> bottom right -> bottom left """ firstJoint: Joint = self[0] resultBox = BoundingBox() resultBox.topLeft = ( int(firstJoint.x_top_left_BB) , int(firstJoint.y_top_left_BB) ) resultBox.topRight = ( int(firstJoint.x_bottom_right_BB) , int(firstJoint.y_top_left_BB) ) resultBox.bottomRight = ( int(firstJoint.x_bottom_right_BB) , int(firstJoint.y_bottom_right_BB) ) resultBox.bottomLeft = ( int(firstJoint.x_top_left_BB) , int(firstJoint.y_bottom_right_BB) ) return resultBox
def detect(self, image): startTime = time.monotonic() bodies, _, weights = self._faceCascade.detectMultiScale3( image, 1.5, 5, outputRejectLevels=True) elapsedTime = (time.monotonic() - startTime) * 1000 logging.info("%.0f ms", elapsedTime) if len(bodies): logging.debug("Bodies: %s / Weights: %s", bodies, weights) else: return [] results = [] for x, y, w, h in bodies: results.append(BoundingBox(x, y, x + w, y + h)) return results
def split_widths(bounding_boxes, evaled_avg_letter_width): idx = 0 splitted = [] while idx < len(bounding_boxes) - 1: box = bounding_boxes[idx] width = box.w n = round(width / evaled_avg_letter_width) if n > 1: new_width = int(round(width / n)) bounding_boxes.pop(idx) splitted += [ BoundingBox((box.x + incr, 0, new_width, box.h)) for incr in range(0, int(n * new_width), int(new_width)) ] else: idx += 1 return sorted(bounding_boxes + splitted, key=lambda x: x.x)
def update_bbox(self, bbox): img_h, img_w, _ = self.image.shape offset_x1 = (bbox[0] - 0.25) * img_w offset_x2 = (bbox[2] - 0.75) * img_w offset_y1 = (bbox[1] - 0.25) * img_h offset_y2 = (bbox[3] - 0.75) * img_h if not self.predicted_bbox: new_x1 = self.roi.x1 + offset_x1 new_x2 = self.roi.x2 + offset_x2 new_y1 = self.roi.y1 + offset_y1 new_y2 = self.roi.y2 + offset_y2 self.predicted_bbox = BoundingBox(new_x1, new_y1, new_x2, new_y2) else: new_x1 = self.predicted_bbox.x1 + offset_x1 new_x2 = self.predicted_bbox.x2 + offset_x2 new_y1 = self.predicted_bbox.y1 + offset_y1 new_y2 = self.predicted_bbox.y2 + offset_y2 self.predicted_bbox.update_coordinates(new_x1, new_x2, new_y1, new_y2)
def read_kitti_data(image_id, image_path, label_path, stride): """Reads groundtruth data from a Kitti annotation file.""" with open(label_path) as file: data = file.read() bboxes = [] lines = data.splitlines() for line in lines: line = line.split() if line[0] in LABEL_IDS: bboxes.append( BoundingBox(line[4], line[5], line[6], line[7], LABEL_IDS[line[0]])) return SSDImage(image_id, image_path, stride, len(LABELS), bboxes=bboxes)
def cut_lines(image, for_crnn=True): # Copy for labelling labelled = image.copy() # Grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Binary _, thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV) # Dilation kernel = np.ones((3, 15 if for_crnn else 100), np.uint8) img_dilation = cv2.dilate(thresh, kernel, iterations=1) # Find, sort contours ctrs, _ = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[1]) # Cut out lines bounding_boxes = [] for i, ctr in enumerate(sorted_ctrs): x, y, w, h = cv2.boundingRect(ctr) bounding_boxes.append(BoundingBox((x, y, w, h))) height, width, _ = image.shape bounding_boxes = seperate_n_lines(bounding_boxes) lines = [] boxes = [] for bounding_box in bounding_boxes: x, y, w, h = bounding_box.x, bounding_box.y, bounding_box.w, bounding_box.h size_dilate_val = int(h / 7) ny = max(y - size_dilate_val, 0) ny2 = min(y + h + size_dilate_val, height) nx = max(x - size_dilate_val, 0) nx2 = min(x + w + size_dilate_val, width) line = image[ny:ny2, nx:nx2] lines.append(line) cv2.rectangle(labelled, (nx, ny), (nx2, ny2), (90, 0, 255), 2) boxes.append((nx, ny, nx2-nx, ny2-ny)) return labelled, lines, boxes
def cluster_bounding_boxes(image, bounding_boxes, threshold): # draw_img = np.copy(image) heat = np.zeros_like(image[:, :, 0]).astype(np.float) heat = add_heat(heat, bounding_boxes) heat = apply_threshold(heat, threshold) heatmap = np.clip(heat, 0, 255) labels = get_labels(heatmap) bboxes = [] for car_number in range(1, labels[1] + 1): # Find pixels with each car_number label value nonzero = (labels[0] == car_number).nonzero() # Identify x and y values of those pixels nonzeroy = np.array(nonzero[0]) nonzerox = np.array(nonzero[1]) # Define a bounding box based on min/max x and y x_c = (np.max(nonzerox) + np.min(nonzerox)) / 2 y_c = (np.max(nonzeroy) + np.min(nonzeroy)) / 2 w = np.max(nonzerox) - np.min(nonzerox) h = np.max(nonzeroy) - np.min(nonzeroy) bboxes.append(BoundingBox([x_c, y_c, w, h])) return bboxes
def get_bounding_boxes(bounding_box_vals, w, h): # bounding_boxes = [BoundingBox(bounding_box_val) for bounding_box_val in bounding_box_vals] bounding_boxes = [ BoundingBox((bounding_box_val[0], 0, bounding_box_val[2], h)) for bounding_box_val in bounding_box_vals ] # Merge bounding boxes bounding_boxes = merge_bounding_boxes(bounding_boxes) # Evaluate letter width evaled_letter_width, evaled_avg_letter_width = eval_letter_width( bounding_boxes) # Combine bounding boxes that intersect but combined width is around letter width bounding_boxes = combine_horizontally(bounding_boxes, evaled_letter_width) # split boxes of n times width of most boxes into n boxes bounding_boxes = split_widths(bounding_boxes, evaled_avg_letter_width) return bounding_boxes
def execute_bounding_box(self, goal): self.bb_active = True mybb = BoundingBox(self.bb_done_callback, self.im_server, goal.image) rr = rospy.Rate(self.loop_rate) while (self.bb_active and (not self.bb_server.is_preempt_requested()) and (not rospy.is_shutdown())): rr.sleep() if self.bb_server.is_preempt_requested(): mybb.cancel() self.bb_server.set_preempted() else: result = mybb.get_result() if result is None: self.bb_server.set_aborted() else: (row1, row2, col1, col2) = result resp = BoundingBoxResult() resp.min_row.data = row1 resp.max_row.data = row2 resp.min_col.data = col1 resp.max_col.data = col2 self.bb_server.set_succeeded(resp)
def bbox_array_callback(self, msg): if msg.header.frame_id == "camera1": cam = self.cam1 elif msg.header.frame_id == "camera6": cam = self.cam6 else: rospy.logwarn( "unrecognized frame id {}, bounding box callback in road estimation fail", msg.header.frame_id) return # rospy.loginfo("camera {:d} message received!!".format(camera.id)) bboxes = [] for obj in msg.objects: # rospy.loginfo("{}".format(obj.label)) # rospy.loginfo("x:{} y:{} width:{} height:{} angle:{}".format(obj.x, obj.y, obj.width, obj.height, obj.angle)) bbox = BoundingBox(obj.x, obj.y, obj.width, obj.height, obj.angle, label=obj.label) bboxes.append(bbox) self.display_bboxes_in_world(cam, bboxes)