def plot_gt(self): ax = plt.gca() # groud truth polygones for p in self.gt_polygons: ax.add_patch(plt.Polygon(p, fill=False, edgecolor='y', linewidth=4)) # groud truth rboxes rboxes = self.gt_rboxes for rbox in rboxes: box = rbox_to_polygon(rbox) ax.add_patch(plt.Polygon(box, fill=False, edgecolor='b', linewidth=2)) plt.plot(rboxes[:,0], rboxes[:,1], 'go', markersize=4)
def plot_results(self, results=None, show_labels=False, color='r'): """Draw the combined bounding boxes.""" if results is None: results = self.results ax = plt.gca() for r in results: rbox = r[:5] xy_rec = rbox_to_polygon(rbox) xy_rec = np.flip(xy_rec, axis=0) # TODO: fix this ax.add_patch(plt.Polygon(xy_rec, fill=False, edgecolor=color, linewidth=2)) if show_labels: label_name = '%.2f' % (r[5],) plt.text(xy_rec[0,0], xy_rec[0,1], label_name, rotation=rbox[4]/np.pi*180, bbox={'facecolor':color, 'alpha':0.5})
def plot_rbox(box, color='r', linewidth=1): xy_rec = rbox_to_polygon(box) ax = plt.gca() ax.add_patch( plt.Polygon(xy_rec, fill=False, edgecolor=color, linewidth=linewidth))
def encode(self, gt_data, debug=False): """Encode ground truth polygones to segments and links for local classification and regression. # Arguments gt_data: shape (boxes, 4 xy + classes) # Return shape (priors, 2 segment_labels + 5 segment_offsets + 2*8 inter_layer_links_labels + 2*4 cross_layer_links_labels) """ rboxes = [] polygons = [] for word in gt_data: xy = np.reshape(word[:8], (-1, 2)) xy = np.copy(xy) * (self.image_w, self.image_h) polygons.append(xy) rbox = polygon_to_rbox(xy) rboxes.append(rbox) rboxes = self.gt_rboxes = np.array(rboxes) polygnos = self.gt_polygons = np.array(polygons) # compute segments for i in range(len(self.prior_maps)): m = self.prior_maps[i] # compute priors #m.compute_priors() num_priors = len(m.priors) # assigne gt to priors a_l = m.minmax_size[0] match_indices = np.full(num_priors, -1, dtype=np.int32) min_lhs_eq_11 = np.full(num_priors, 1e6, dtype=np.float32) for j in range(len(rboxes)): # ~12.9 ms cx, cy, w, h, theta = rboxes[j] c = rboxes[j, :2] # constraint on ratio between box size and word height, equation (11) lhs_eq_11 = max(a_l / h, h / a_l) if lhs_eq_11 <= 1.5: R = rot_matrix(theta) for k in range(num_priors): # hurts # is center of prior is in gt rbox d = np.abs(np.dot(m.priors_xy[k] - c, R.T)) if d[0] < w / 2. and d[1] < h / 2.: # is lhs of equation (11) minimal for prior if lhs_eq_11 < min_lhs_eq_11[k]: min_lhs_eq_11[k] = lhs_eq_11 match_indices[k] = j m.match_indices = match_indices segment_mask = match_indices != -1 # segment labels m.segment_labels = np.empty((num_priors, 2), dtype=np.int8) m.segment_labels[:, 0] = np.logical_not(segment_mask) m.segment_labels[:, 1] = segment_mask # compute offsets only for assigned boxes m.segment_offsets = np.zeros((num_priors, 5)) pos_segment_idxs = np.nonzero(segment_mask)[0] for j in pos_segment_idxs: # box_idx # ~4 ms gt_idx = match_indices[j] rbox = rboxes[gt_idx] polygon = polygons[gt_idx] cx, cy, w, h, theta = rbox R = rot_matrix(theta) prior_x, prior_y = m.priors_xy[j] prior_w, prior_h = m.priors_wh[j] # step 2 figuer 5, rotate word anticlockwise around the center of prior d = rbox[:2] - m.priors_xy[j] #poly_loc = rbox_to_polygon([*d, w, h, theta]) poly_loc = rbox_to_polygon(list(d) + [w, h, theta]) poly_loc_easy = polygon - m.priors_xy[j] poly_loc_rot = np.dot(poly_loc, R.T) # step 3 figure 5, crop word to left and right of prior poly_loc_croped = np.copy(poly_loc_rot) poly_loc_croped[:, 0] = np.clip(poly_loc_croped[:, 0], -prior_w / 2., prior_w / 2.) # step 4 figure 5, rotate croped word box clockwisely poly_loc_rot_back = np.dot(poly_loc_croped, R) rbox_loc_rot_back = polygon_to_rbox(poly_loc_rot_back) # encode, solve (3) to (7) to get local offsets #offset = np.array([*(rbox_loc_rot_back[:2]/a_l), # *(np.log(rbox_loc_rot_back[2:4]/a_l)), # rbox_loc_rot_back[4]]) offset = np.array( list(rbox_loc_rot_back[:2] / a_l) + list(np.log(rbox_loc_rot_back[2:4] / a_l)) + [rbox_loc_rot_back[4]]) offset[:4] /= m.priors[j, -4:] # variances m.segment_offsets[j] = offset # for debugging local geometry if debug: prior_poly_loc = np.array( [[-prior_w, +prior_h], [+prior_w, +prior_h], [+prior_w, -prior_h], [-prior_w, -prior_h]]) / 2. plt.figure(figsize=[10] * 2) ax = plt.gca() ax.add_patch( plt.Polygon(prior_poly_loc, fill=False, edgecolor='r', linewidth=1)) ax.add_patch( plt.Polygon(poly_loc, fill=False, edgecolor='b', linewidth=1)) ax.add_patch( plt.Polygon(np.dot(poly_loc, R.T), fill=False, edgecolor='k', linewidth=1)) #ax.add_patch(plt.Polygon(poly_loc_easy, fill=False, edgecolor='r', linewidth=1)) #ax.add_patch(plt.Polygon(np.dot(poly_loc_easy, R.T), fill=False, edgecolor='y', linewidth=1)) ax.add_patch( plt.Polygon(poly_loc_croped, fill=False, edgecolor='c', linewidth=1)) ax.add_patch( plt.Polygon(poly_loc_rot_back, fill=False, edgecolor='y', linewidth=1)) lim = 50 plt.xlim(-lim, lim) plt.ylim(-lim, lim) plt.grid() plt.show() break # compute link labels m.inter_layer_links_labels = np.zeros((num_priors, 16), dtype=np.int8) m.cross_layer_links_labels = np.zeros((num_priors, 8), dtype=np.int8) if i > 0: previous_map = self.prior_maps[i - 1] # we only have to check neighbors if we are positive for idx in pos_segment_idxs: neighbor_idxs = m.inter_layer_neighbors_idxs[idx] for n, neighbor_idx in enumerate(neighbor_idxs): # valid neighbors if m.inter_layer_neighbors_valid[idx, n]: # neighbor matched to the same word if match_indices[idx] == match_indices[neighbor_idx]: # since we are positive and match to the same word, neighbor has to be positive m.inter_layer_links_labels[idx, n * 2 + 1] = 1 # would be nice, but we refere to invalid neighbors #label = m.inter_layer_neighbors_valid[idx] & (match_indices[neighbor_idxs] == match_indices[idx]) #m.inter_layer_links_labels[idx, 1::2] = label if i > 0: neighbor_idxs = m.cross_layer_neighbors_idxs[idx] for n, neighbor_idx in enumerate(neighbor_idxs): # cross layer neighbors are always valid if match_indices[idx] == previous_map.match_indices[ neighbor_idx]: m.cross_layer_links_labels[idx, n * 2 + 1] = 1 m.inter_layer_links_labels[:, ::2] = np.logical_not( m.inter_layer_links_labels[:, 1::2]) m.cross_layer_links_labels[:, ::2] = np.logical_not( m.cross_layer_links_labels[:, 1::2]) # collect encoded ground truth maps = self.prior_maps segment_labels = np.concatenate([m.segment_labels for m in maps]) segment_offsets = np.concatenate([m.segment_offsets for m in maps]) inter_layer_links_labels = np.concatenate( [m.inter_layer_links_labels for m in maps]) cross_layer_links_labels = np.concatenate( [m.cross_layer_links_labels for m in maps]) return np.concatenate([ segment_labels, segment_offsets, inter_layer_links_labels, cross_layer_links_labels ], axis=1)